From 72dfec46cbfd522d4b8983e81dde5351f7788634 Mon Sep 17 00:00:00 2001 From: Rita Aleksziev Date: Tue, 28 Jan 2025 20:00:32 +0100 Subject: [PATCH] Add metrics to metric table --- cognee/api/v1/cognify/cognify_v2.py | 2 + cognee/modules/data/models/MetricData.py | 14 +++---- cognee/tasks/storage/descriptive_metrics.py | 46 +++++++++++++++++++++ 3 files changed, 55 insertions(+), 7 deletions(-) create mode 100644 cognee/tasks/storage/descriptive_metrics.py diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 12a84030d..48d46417f 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -25,6 +25,7 @@ ) from cognee.tasks.graph import extract_graph_from_data from cognee.tasks.storage import add_data_points +from cognee.tasks.storage.descriptive_metrics import store_descriptive_metrics from cognee.tasks.storage.index_graph_edges import index_graph_edges from cognee.tasks.summarization import summarize_text @@ -164,6 +165,7 @@ async def get_default_tasks( task_config={"batch_size": 10}, ), Task(add_data_points, only_root=True, task_config={"batch_size": 10}), + Task(store_descriptive_metrics), ] except Exception as error: send_telemetry("cognee.cognify DEFAULT TASKS CREATION ERRORED", user.id) diff --git a/cognee/modules/data/models/MetricData.py b/cognee/modules/data/models/MetricData.py index ef6c33d1d..4f1b91e1b 100644 --- a/cognee/modules/data/models/MetricData.py +++ b/cognee/modules/data/models/MetricData.py @@ -11,13 +11,13 @@ class GraphMetricData(Base): # TODO: Change ID to reflect unique id of graph database id = Column(UUID, primary_key=True, default=uuid4) - num_tokens = Column(Integer) - num_nodes = Column(Integer) - num_edges = Column(Integer) - mean_degree = Column(Float) - edge_density = Column(Float) - num_connected_components = Column(Integer) - sizes_of_connected_components = Column(ARRAY(Integer)) + num_tokens = Column(Integer, nullable=True) + num_nodes = Column(Integer, nullable=True) + num_edges = Column(Integer, nullable=True) + mean_degree = Column(Float, nullable=True) + edge_density = Column(Float, nullable=True) + num_connected_components = Column(Integer, nullable=True) + sizes_of_connected_components = Column(ARRAY(Integer), nullable=True) num_selfloops = Column(Integer, nullable=True) diameter = Column(Integer, nullable=True) avg_shortest_path_length = Column(Float, nullable=True) diff --git a/cognee/tasks/storage/descriptive_metrics.py b/cognee/tasks/storage/descriptive_metrics.py new file mode 100644 index 000000000..5c1f710a6 --- /dev/null +++ b/cognee/tasks/storage/descriptive_metrics.py @@ -0,0 +1,46 @@ +from cognee.infrastructure.engine import DataPoint +from cognee.modules.data.processing.document_types import Document +from cognee.infrastructure.databases.relational import get_relational_engine +from sqlalchemy import select +from cognee.modules.data.models import Data +from cognee.modules.data.models.MetricData import GraphMetricData +import uuid +from cognee.infrastructure.databases.graph import get_graph_engine + + +async def fetch_token_count(db_engine) -> int: + """ + Fetches and sums token counts from the database. + + Returns: + int: The total number of tokens across all documents. + """ + + async with db_engine.get_async_session() as session: + document_data_points = await session.execute(select(Data)) + token_count_sum = sum(document.token_count for document in document_data_points.scalars()) + + return token_count_sum + + +async def calculate_graph_metrics(graph_data): + nodes, edges = graph_data + graph_metrics = { + "num_nodes": len(nodes), + "num_edges": len(edges), + } + return graph_metrics + + +async def store_descriptive_metrics(data_points: list[DataPoint]): + db_engine = get_relational_engine() + graph_engine = await get_graph_engine() + graph_data = await graph_engine.get_graph_data() + + token_count_sum = await fetch_token_count(db_engine) + graph_metrics = await calculate_graph_metrics(graph_data) + + table_name = "graph_metric_table" + metrics_dict = {"id": uuid.uuid4(), "num_tokens": token_count_sum} | graph_metrics + + await db_engine.insert_data(table_name, metrics_dict)