From 1981adae070a3e1c61bab883898fca0a87b7b173 Mon Sep 17 00:00:00 2001 From: Aristo <6344553+randombet@users.noreply.github.com> Date: Tue, 20 Aug 2024 08:24:53 +0000 Subject: [PATCH 1/5] [Graph RAG] Init Commit with GraphRag interfaces --- .../agentchat/contrib/graph_rag/__init__.py | 0 .../contrib/graph_rag/graph_rag_agent.py | 44 +++++++++++++++++++ .../contrib/graph_rag/graph_store.py | 25 +++++++++++ 3 files changed, 69 insertions(+) create mode 100644 autogen/agentchat/contrib/graph_rag/__init__.py create mode 100644 autogen/agentchat/contrib/graph_rag/graph_rag_agent.py create mode 100644 autogen/agentchat/contrib/graph_rag/graph_store.py diff --git a/autogen/agentchat/contrib/graph_rag/__init__.py b/autogen/agentchat/contrib/graph_rag/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py b/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py new file mode 100644 index 00000000000..b34913910a9 --- /dev/null +++ b/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py @@ -0,0 +1,44 @@ +from abc import ABC, abstractmethod +from typing import List + +from autogen.agentchat import ConversableAgent + +from .graph_store import GraphStore + + +class GraphRagAgent(ConversableAgent, ABC): + """ + A graph rag agent is a conversable agent which could query graph database for answers. + + An implementing agent class would + 1. create a graph in the underlying database with input documents + 2. use the retrieve() method to retrieve information. + 3. use the retrieved information to generate and send back messages. + """ + + @abstractmethod + def _init_db(self, input_doc: List | None = None) -> GraphStore: + """ + This method initializes graph database with the input documents or records. + Usually, it takes the following steps, + 1. connecting to a graph database. + 2. extract graph nodes, edges based on input data, graph schema and etc. + 3. build indexes etc. + + return: GraphStore + """ + pass + + @abstractmethod + def retrieve(self, question: str, **kwargs): + """ + Retrieve answers with human readable questions. + """ + pass + + @abstractmethod + def add_records(self, new_records: List) -> bool: + """ + Add new records to the underlying database and add to the graph if required. + """ + pass diff --git a/autogen/agentchat/contrib/graph_rag/graph_store.py b/autogen/agentchat/contrib/graph_rag/graph_store.py new file mode 100644 index 00000000000..75e78fb53eb --- /dev/null +++ b/autogen/agentchat/contrib/graph_rag/graph_store.py @@ -0,0 +1,25 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass + + +@dataclass +class GraphStoreQueryResult: + """ + A wrapper of graph store query results. + """ + + answer: str + + +class GraphStore(ABC): + """An abstract base class that represents a underlying graph database. + + This interface defines the basic methods which are required by implementing graph rag from graph database. + """ + + @abstractmethod + def query(self, question: str, **kwargs) -> GraphStoreQueryResult: + """ + This method transform a string format question into database query and return the result. + """ + pass From bbb056ca3d9f5cda7205462928cb57dce28d6986 Mon Sep 17 00:00:00 2001 From: Aristo <6344553+randombet@users.noreply.github.com> Date: Tue, 27 Aug 2024 17:38:58 +0000 Subject: [PATCH 2/5] Add GraphRAG document class --- .../agentchat/contrib/graph_rag/document.py | 24 ++++++++++ .../contrib/graph_rag/graph_rag_agent.py | 48 +++++++++++++++---- .../contrib/graph_rag/graph_store.py | 13 +++-- 3 files changed, 72 insertions(+), 13 deletions(-) create mode 100644 autogen/agentchat/contrib/graph_rag/document.py diff --git a/autogen/agentchat/contrib/graph_rag/document.py b/autogen/agentchat/contrib/graph_rag/document.py new file mode 100644 index 00000000000..9730269c7ab --- /dev/null +++ b/autogen/agentchat/contrib/graph_rag/document.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass +from enum import Enum, auto +from typing import Optional + + +class DocumentType(Enum): + """ + Enum for supporting document type. + """ + + TEXT = auto() + HTML = auto() + PDF = auto() + + +@dataclass +class Document: + """ + A wrapper of graph store query results. + """ + + doctype: DocumentType + data: Optional[object] = None + path_or_url: Optional[str] = "" diff --git a/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py b/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py index b34913910a9..dfe16709130 100644 --- a/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py +++ b/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py @@ -1,12 +1,12 @@ -from abc import ABC, abstractmethod -from typing import List +from typing import List, Protocol from autogen.agentchat import ConversableAgent +from .document import Document from .graph_store import GraphStore -class GraphRagAgent(ConversableAgent, ABC): +class GraphRagAgent(ConversableAgent, Protocol): """ A graph rag agent is a conversable agent which could query graph database for answers. @@ -14,10 +14,41 @@ class GraphRagAgent(ConversableAgent, ABC): 1. create a graph in the underlying database with input documents 2. use the retrieve() method to retrieve information. 3. use the retrieved information to generate and send back messages. + + For example, + graph_rag_agent = GraphRagAgent( + name="movie knowledge graph agent", + human_input_mode="NEVER", + max_consecutive_auto_reply=3, + retrieve_config={ + "docs_path": [ + "./data/movies.txt", + ], + "llm_config" = autogen.config_list_from_json("OAI_CONFIG_LIST") + "database_config" = { + "host": "127.0.0.1", + "port": 6379, + "table_name": "movies" + } + }, + ) + + # initialize database (internally) + # self._init_db(input_doc=[Document(doc) for doc in retrieve_config["docs_path"]]) + + question = "Name a few actors who've played in 'The Matrix'" + + answer = graph_rag_agent.retrieve(question) + + # answer: + # A few actors who have played in 'The Matrix' are: + # - Keanu Reeves + # - Laurence Fishburne + # - Carrie-Anne Moss + # - Hugo Weaving """ - @abstractmethod - def _init_db(self, input_doc: List | None = None) -> GraphStore: + def _init_db(self, input_doc: List[Document] | None = None) -> GraphStore: """ This method initializes graph database with the input documents or records. Usually, it takes the following steps, @@ -25,18 +56,19 @@ def _init_db(self, input_doc: List | None = None) -> GraphStore: 2. extract graph nodes, edges based on input data, graph schema and etc. 3. build indexes etc. - return: GraphStore + Args: + input_doc: a list of input documents that are used to build the graph in database. + + Returns: GraphStore """ pass - @abstractmethod def retrieve(self, question: str, **kwargs): """ Retrieve answers with human readable questions. """ pass - @abstractmethod def add_records(self, new_records: List) -> bool: """ Add new records to the underlying database and add to the graph if required. diff --git a/autogen/agentchat/contrib/graph_rag/graph_store.py b/autogen/agentchat/contrib/graph_rag/graph_store.py index 75e78fb53eb..88dfcd47e58 100644 --- a/autogen/agentchat/contrib/graph_rag/graph_store.py +++ b/autogen/agentchat/contrib/graph_rag/graph_store.py @@ -1,24 +1,27 @@ -from abc import ABC, abstractmethod from dataclasses import dataclass +from typing import List, Optional, Protocol @dataclass class GraphStoreQueryResult: """ A wrapper of graph store query results. + + answer: human readable answer to question/query. + results: intermediate results to question/query, e.g. node entities. """ - answer: str + answer: Optional[str] = None + results: Optional[List] = [] -class GraphStore(ABC): +class GraphStore(Protocol): """An abstract base class that represents a underlying graph database. This interface defines the basic methods which are required by implementing graph rag from graph database. """ - @abstractmethod - def query(self, question: str, **kwargs) -> GraphStoreQueryResult: + def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult: """ This method transform a string format question into database query and return the result. """ From fb1b53127e6bebd26950943335f9a45f427e0847 Mon Sep 17 00:00:00 2001 From: Aristo <6344553+randombet@users.noreply.github.com> Date: Tue, 27 Aug 2024 18:22:08 +0000 Subject: [PATCH 3/5] Add example to use initiate_chat --- .../contrib/graph_rag/graph_rag_agent.py | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py b/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py index dfe16709130..fb893960332 100644 --- a/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py +++ b/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py @@ -17,8 +17,7 @@ class GraphRagAgent(ConversableAgent, Protocol): For example, graph_rag_agent = GraphRagAgent( - name="movie knowledge graph agent", - human_input_mode="NEVER", + name="graph_rag_agent", max_consecutive_auto_reply=3, retrieve_config={ "docs_path": [ @@ -36,16 +35,26 @@ class GraphRagAgent(ConversableAgent, Protocol): # initialize database (internally) # self._init_db(input_doc=[Document(doc) for doc in retrieve_config["docs_path"]]) - question = "Name a few actors who've played in 'The Matrix'" + user_proxy = UserProxyAgent( + name="user_proxy", + code_execution_config=False, + is_termination_msg=lambda msg: "TERMINATE" in msg["content"], + human_input_mode="ALWAYS", + ) + user_proxy.initiate_chat(graph_rag_agent, message="Name a few actors who've played in 'The Matrix'") - answer = graph_rag_agent.retrieve(question) + # ChatResult( + # chat_id=None, + # chat_history=[ + # {'content': 'Name a few actors who've played in \'The Matrix\'', 'role': 'graph_rag_agent'}, + # {'content': 'A few actors who have played in The Matrix are: + # - Keanu Reeves + # - Laurence Fishburne + # - Carrie-Anne Moss + # - Hugo Weaving', + # 'role': 'user_proxy'}, + # ...) - # answer: - # A few actors who have played in 'The Matrix' are: - # - Keanu Reeves - # - Laurence Fishburne - # - Carrie-Anne Moss - # - Hugo Weaving """ def _init_db(self, input_doc: List[Document] | None = None) -> GraphStore: @@ -63,12 +72,6 @@ def _init_db(self, input_doc: List[Document] | None = None) -> GraphStore: """ pass - def retrieve(self, question: str, **kwargs): - """ - Retrieve answers with human readable questions. - """ - pass - def add_records(self, new_records: List) -> bool: """ Add new records to the underlying database and add to the graph if required. From 23aace269ce1a86599e640f7ad13511158722167 Mon Sep 17 00:00:00 2001 From: Aristo <6344553+randombet@users.noreply.github.com> Date: Wed, 28 Aug 2024 23:32:31 +0000 Subject: [PATCH 4/5] Refactor graph store to graph query engine --- .../contrib/graph_rag/graph_query_engine.py | 51 +++++++++++++++++++ .../contrib/graph_rag/graph_rag_agent.py | 43 +++------------- .../contrib/graph_rag/graph_store.py | 28 ---------- 3 files changed, 59 insertions(+), 63 deletions(-) create mode 100644 autogen/agentchat/contrib/graph_rag/graph_query_engine.py delete mode 100644 autogen/agentchat/contrib/graph_rag/graph_store.py diff --git a/autogen/agentchat/contrib/graph_rag/graph_query_engine.py b/autogen/agentchat/contrib/graph_rag/graph_query_engine.py new file mode 100644 index 00000000000..3352b5a32cd --- /dev/null +++ b/autogen/agentchat/contrib/graph_rag/graph_query_engine.py @@ -0,0 +1,51 @@ +from dataclasses import dataclass +from typing import List, Optional, Protocol + +from .document import Document + + +@dataclass +class GraphStoreQueryResult: + """ + A wrapper of graph store query results. + + answer: human readable answer to question/query. + results: intermediate results to question/query, e.g. node entities. + """ + + answer: Optional[str] = None + results: Optional[List] = [] + + +class GraphQueryEngine(Protocol): + """An abstract base class that represents a graph query engine on top of a underlying graph database. + + This interface defines the basic methods for graph rag. + """ + + def init_db(self, input_doc: List[Document] | None = None): + """ + This method initializes graph database with the input documents or records. + Usually, it takes the following steps, + 1. connecting to a graph database. + 2. extract graph nodes, edges based on input data, graph schema and etc. + 3. build indexes etc. + + Args: + input_doc: a list of input documents that are used to build the graph in database. + + Returns: GraphStore + """ + pass + + def add_records(self, new_records: List) -> bool: + """ + Add new records to the underlying database and add to the graph if required. + """ + pass + + def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult: + """ + This method transform a string format question into database query and return the result. + """ + pass diff --git a/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py b/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py index fb893960332..b62f5ec79dd 100644 --- a/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py +++ b/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py @@ -2,8 +2,7 @@ from autogen.agentchat import ConversableAgent -from .document import Document -from .graph_store import GraphStore +from .graph_query_engine import GraphQueryEngine class GraphRagAgent(ConversableAgent, Protocol): @@ -16,24 +15,15 @@ class GraphRagAgent(ConversableAgent, Protocol): 3. use the retrieved information to generate and send back messages. For example, + graph_query_engine = GraphQueryEngine(...) + graph_query_engine.init_db([Document(doc1), Document(doc2), ...]) + graph_rag_agent = GraphRagAgent( name="graph_rag_agent", max_consecutive_auto_reply=3, - retrieve_config={ - "docs_path": [ - "./data/movies.txt", - ], - "llm_config" = autogen.config_list_from_json("OAI_CONFIG_LIST") - "database_config" = { - "host": "127.0.0.1", - "port": 6379, - "table_name": "movies" - } - }, + ... ) - - # initialize database (internally) - # self._init_db(input_doc=[Document(doc) for doc in retrieve_config["docs_path"]]) + graph_rag_agent.attach_graph_query_engine(graph_query_engine) user_proxy = UserProxyAgent( name="user_proxy", @@ -57,23 +47,6 @@ class GraphRagAgent(ConversableAgent, Protocol): """ - def _init_db(self, input_doc: List[Document] | None = None) -> GraphStore: - """ - This method initializes graph database with the input documents or records. - Usually, it takes the following steps, - 1. connecting to a graph database. - 2. extract graph nodes, edges based on input data, graph schema and etc. - 3. build indexes etc. - - Args: - input_doc: a list of input documents that are used to build the graph in database. - - Returns: GraphStore - """ - pass - - def add_records(self, new_records: List) -> bool: - """ - Add new records to the underlying database and add to the graph if required. - """ + def attach_graph_query_engine(self, graph_query_engine: GraphQueryEngine): + """Add a graph query engine to the agent.""" pass diff --git a/autogen/agentchat/contrib/graph_rag/graph_store.py b/autogen/agentchat/contrib/graph_rag/graph_store.py deleted file mode 100644 index 88dfcd47e58..00000000000 --- a/autogen/agentchat/contrib/graph_rag/graph_store.py +++ /dev/null @@ -1,28 +0,0 @@ -from dataclasses import dataclass -from typing import List, Optional, Protocol - - -@dataclass -class GraphStoreQueryResult: - """ - A wrapper of graph store query results. - - answer: human readable answer to question/query. - results: intermediate results to question/query, e.g. node entities. - """ - - answer: Optional[str] = None - results: Optional[List] = [] - - -class GraphStore(Protocol): - """An abstract base class that represents a underlying graph database. - - This interface defines the basic methods which are required by implementing graph rag from graph database. - """ - - def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult: - """ - This method transform a string format question into database query and return the result. - """ - pass From c9a1ad07ecbfb226d4fe43673d64eca2d4fe3ee4 Mon Sep 17 00:00:00 2001 From: Aristo <6344553+randombet@users.noreply.github.com> Date: Wed, 4 Sep 2024 03:05:37 +0000 Subject: [PATCH 5/5] Add Graph RAG Capability --- .../contrib/graph_rag/graph_query_engine.py | 4 +-- ...h_rag_agent.py => graph_rag_capability.py} | 32 +++++++++++-------- .../contrib/graph_rag/test_graph_rag_basic.py | 17 ++++++++++ 3 files changed, 37 insertions(+), 16 deletions(-) rename autogen/agentchat/contrib/graph_rag/{graph_rag_agent.py => graph_rag_capability.py} (55%) create mode 100644 test/agentchat/contrib/graph_rag/test_graph_rag_basic.py diff --git a/autogen/agentchat/contrib/graph_rag/graph_query_engine.py b/autogen/agentchat/contrib/graph_rag/graph_query_engine.py index 3352b5a32cd..28ef6ede84a 100644 --- a/autogen/agentchat/contrib/graph_rag/graph_query_engine.py +++ b/autogen/agentchat/contrib/graph_rag/graph_query_engine.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import List, Optional, Protocol from .document import Document @@ -14,7 +14,7 @@ class GraphStoreQueryResult: """ answer: Optional[str] = None - results: Optional[List] = [] + results: list = field(default_factory=list) class GraphQueryEngine(Protocol): diff --git a/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py b/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py similarity index 55% rename from autogen/agentchat/contrib/graph_rag/graph_rag_agent.py rename to autogen/agentchat/contrib/graph_rag/graph_rag_capability.py index b62f5ec79dd..b6412305e06 100644 --- a/autogen/agentchat/contrib/graph_rag/graph_rag_agent.py +++ b/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py @@ -1,29 +1,29 @@ -from typing import List, Protocol - -from autogen.agentchat import ConversableAgent +from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability +from autogen.agentchat.conversable_agent import ConversableAgent from .graph_query_engine import GraphQueryEngine -class GraphRagAgent(ConversableAgent, Protocol): +class GraphRagCapability(AgentCapability): """ - A graph rag agent is a conversable agent which could query graph database for answers. + A graph rag capability uses a graph query engine to give a conversable agent the graph rag ability. - An implementing agent class would - 1. create a graph in the underlying database with input documents - 2. use the retrieve() method to retrieve information. - 3. use the retrieved information to generate and send back messages. + An agent class with graph rag capability could + 1. create a graph in the underlying database with input documents. + 2. retrieved relevant information based on messages received by the agent. + 3. generate answers from retrieved information and send messages back. For example, graph_query_engine = GraphQueryEngine(...) graph_query_engine.init_db([Document(doc1), Document(doc2), ...]) - graph_rag_agent = GraphRagAgent( + graph_rag_agent = ConversableAgent( name="graph_rag_agent", max_consecutive_auto_reply=3, ... ) - graph_rag_agent.attach_graph_query_engine(graph_query_engine) + graph_rag_capability = GraphRagCapbility(graph_query_engine) + graph_rag_capability.add_to_agent(graph_rag_agent) user_proxy = UserProxyAgent( name="user_proxy", @@ -47,6 +47,10 @@ class GraphRagAgent(ConversableAgent, Protocol): """ - def attach_graph_query_engine(self, graph_query_engine: GraphQueryEngine): - """Add a graph query engine to the agent.""" - pass + def __init__(self, query_engine: GraphQueryEngine): + """ + initialize graph rag capability with a graph query engine + """ + ... + + def add_to_agent(self, agent: ConversableAgent): ... diff --git a/test/agentchat/contrib/graph_rag/test_graph_rag_basic.py b/test/agentchat/contrib/graph_rag/test_graph_rag_basic.py new file mode 100644 index 00000000000..7c4a5094947 --- /dev/null +++ b/test/agentchat/contrib/graph_rag/test_graph_rag_basic.py @@ -0,0 +1,17 @@ +from unittest.mock import Mock + +from autogen.agentchat.contrib.graph_rag.graph_query_engine import GraphQueryEngine +from autogen.agentchat.contrib.graph_rag.graph_rag_capability import GraphRagCapability +from autogen.agentchat.conversable_agent import ConversableAgent + + +def test_dry_run(): + """Dry run for basic graph rag objects.""" + mock_graph_query_engine = Mock(spec=GraphQueryEngine) + + graph_rag_agent = ConversableAgent( + name="graph_rag_agent", + max_consecutive_auto_reply=3, + ) + graph_rag_capability = GraphRagCapability(mock_graph_query_engine) + graph_rag_capability.add_to_agent(graph_rag_agent)