From 16139f74431f5ae2a35b3184af39b157112dbfb8 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sat, 15 Jun 2024 11:37:52 +0200 Subject: [PATCH] Shiny new LLMOps --- cognee/api/v1/cognify/cognify.py | 27 ++++++++++++++++++- cognee/api/v1/config/config.py | 13 +++++++++ .../infrastructure/databases/graph/config.py | 5 ++-- .../llm/prompts/extract_topology.txt | 4 ++- .../modules/topology/infer_data_topology.py | 2 +- 5 files changed, 46 insertions(+), 5 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 408b92e76..b8b2569c3 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -1,4 +1,5 @@ import asyncio +import os from uuid import uuid4 from typing import List, Union import logging @@ -149,15 +150,23 @@ async def process_batch(files_batch): graph_config = get_graph_config() graph_topology = graph_config.graph_model + print(graph_config.infer_graph_topology) + print(graph_config.graph_topology_task) + + if graph_config.infer_graph_topology and graph_config.graph_topology_task: from cognee.modules.topology.topology import TopologyEngine topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) await topology_engine.add_graph_topology(dataset_files=dataset_files) + print('infered topology added') + parent_node_id=None elif not graph_config.infer_graph_topology: from cognee.modules.topology.topology import TopologyEngine topology_engine = TopologyEngine(infer=graph_config.infer_graph_topology) await topology_engine.add_graph_topology(graph_config.topology_file_path) + print('provided topology added') + parent_node_id=None elif not graph_config.graph_topology_task: parent_node_id = f"DefaultGraphModel__{USER_ID}" @@ -294,14 +303,30 @@ async def test(): from cognee.api.v1.add import add + dataset_name = "explanations" + print(os.getcwd()) + data_dir = os.path.abspath("../../../.data") + print(os.getcwd()) + from pathlib import Path + dir = Path.joinpath(Path.cwd(), ".data") + + await add(f"data://{dir}", dataset_name="explanations") + await add([text], "example_dataset") from cognee.api.v1.config.config import config config.set_chunk_engine(ChunkEngine.LANGCHAIN_ENGINE ) config.set_chunk_strategy(ChunkStrategy.LANGCHAIN_CHARACTER) config.embedding_engine = LiteLLMEmbeddingEngine() + config.set_chunk_engine(ChunkEngine.LANGCHAIN_ENGINE) + config.set_graph_topology_task=True + config.set_infer_graph_topology=True + + from cognee.api.v1.datasets.datasets import datasets + print(datasets.list_datasets()) + + graph = await cognify("explanations") - graph = await cognify() # vector_client = infrastructure_config.get_config("vector_engine") # # out = await vector_client.search(collection_name ="basic_rag", query_text="show_all_processes", limit=10) diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py index dd8e4e53e..854b95560 100644 --- a/cognee/api/v1/config/config.py +++ b/cognee/api/v1/config/config.py @@ -132,5 +132,18 @@ def set_graphistry_password(graphistry_password: str): base_config = get_base_config() base_config.graphistry_password = graphistry_password + @staticmethod + def set_graph_topology_task(graph_topology_task: bool): + base_config = get_graph_config() + base_config.graph_topology_task = graph_topology_task + + @staticmethod + def set_infer_graph_topology(infer_graph_topology: bool): + base_config = get_graph_config() + base_config.infer_graph_topology = infer_graph_topology + @staticmethod + def set_topology_file_path(topology_file_path: bool): + base_config = get_graph_config() + base_config.topology_file_path = topology_file_path diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index d20d997eb..e048f48ae 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -19,8 +19,8 @@ class GraphConfig(BaseSettings): graph_filename ) graph_model: object = KnowledgeGraph - graph_topology_task: bool = False - graph_topology: object = KnowledgeGraph + graph_topology_task: bool = True + graph_topology_model: object = KnowledgeGraph infer_graph_topology: bool = True topology_file_path: str = os.path.join( os.path.join(get_absolute_path(".cognee_system"), "databases"), @@ -39,6 +39,7 @@ def to_dict(self) -> dict: "graph_database_password": self.graph_database_password, "graph_database_port": self.graph_database_port, "infer_graph_topology": self.infer_graph_topology, + "graph_topology_task": self.graph_topology_task, } diff --git a/cognee/infrastructure/llm/prompts/extract_topology.txt b/cognee/infrastructure/llm/prompts/extract_topology.txt index 06065f5a6..a9cabee06 100644 --- a/cognee/infrastructure/llm/prompts/extract_topology.txt +++ b/cognee/infrastructure/llm/prompts/extract_topology.txt @@ -1,4 +1,6 @@ You are a topology master and need to extract the following topology information from the text provided to you. Relationship parts can't be empty, and have to be logical AND CONNECTING ELEMENTS OF THE TOPOLOGY The source is the parent of the target. And the target is the child of the source. -Have in mind this model needs to become a graph later, and USE EXISTING IDS AS NODE IDS \ No newline at end of file +Have in mind this model needs to become a graph later, and USE EXISTING IDS AS NODE IDS +You are just connecting documents, you don't need to decompose the documents into words or anything like that. +Use document id as name if name is not present \ No newline at end of file diff --git a/cognee/modules/topology/infer_data_topology.py b/cognee/modules/topology/infer_data_topology.py index ada7ea498..2af63e175 100644 --- a/cognee/modules/topology/infer_data_topology.py +++ b/cognee/modules/topology/infer_data_topology.py @@ -7,7 +7,7 @@ async def infer_data_topology(content: str, graph_topology=None): if graph_topology is None: graph_config = get_graph_config() - graph_topology = graph_config.graph_topology + graph_topology = graph_config.graph_topology_model print("content: ", type(content)) try: