Skip to content

Commit

Permalink
Add metrics to metric table
Browse files Browse the repository at this point in the history
  • Loading branch information
alekszievr committed Jan 28, 2025
1 parent 9182be8 commit 72dfec4
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 7 deletions.
2 changes: 2 additions & 0 deletions cognee/api/v1/cognify/cognify_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)
from cognee.tasks.graph import extract_graph_from_data
from cognee.tasks.storage import add_data_points
from cognee.tasks.storage.descriptive_metrics import store_descriptive_metrics
from cognee.tasks.storage.index_graph_edges import index_graph_edges
from cognee.tasks.summarization import summarize_text

Expand Down Expand Up @@ -164,6 +165,7 @@ async def get_default_tasks(
task_config={"batch_size": 10},
),
Task(add_data_points, only_root=True, task_config={"batch_size": 10}),
Task(store_descriptive_metrics),
]
except Exception as error:
send_telemetry("cognee.cognify DEFAULT TASKS CREATION ERRORED", user.id)
Expand Down
14 changes: 7 additions & 7 deletions cognee/modules/data/models/MetricData.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ class GraphMetricData(Base):

# TODO: Change ID to reflect unique id of graph database
id = Column(UUID, primary_key=True, default=uuid4)
num_tokens = Column(Integer)
num_nodes = Column(Integer)
num_edges = Column(Integer)
mean_degree = Column(Float)
edge_density = Column(Float)
num_connected_components = Column(Integer)
sizes_of_connected_components = Column(ARRAY(Integer))
num_tokens = Column(Integer, nullable=True)
num_nodes = Column(Integer, nullable=True)
num_edges = Column(Integer, nullable=True)
mean_degree = Column(Float, nullable=True)
edge_density = Column(Float, nullable=True)
num_connected_components = Column(Integer, nullable=True)
sizes_of_connected_components = Column(ARRAY(Integer), nullable=True)
num_selfloops = Column(Integer, nullable=True)
diameter = Column(Integer, nullable=True)
avg_shortest_path_length = Column(Float, nullable=True)
Expand Down
46 changes: 46 additions & 0 deletions cognee/tasks/storage/descriptive_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from cognee.infrastructure.engine import DataPoint
from cognee.modules.data.processing.document_types import Document
from cognee.infrastructure.databases.relational import get_relational_engine
from sqlalchemy import select
from cognee.modules.data.models import Data
from cognee.modules.data.models.MetricData import GraphMetricData
import uuid
from cognee.infrastructure.databases.graph import get_graph_engine


async def fetch_token_count(db_engine) -> int:
"""
Fetches and sums token counts from the database.
Returns:
int: The total number of tokens across all documents.
"""

async with db_engine.get_async_session() as session:
document_data_points = await session.execute(select(Data))
token_count_sum = sum(document.token_count for document in document_data_points.scalars())

return token_count_sum


async def calculate_graph_metrics(graph_data):
nodes, edges = graph_data
graph_metrics = {
"num_nodes": len(nodes),
"num_edges": len(edges),
}
return graph_metrics


async def store_descriptive_metrics(data_points: list[DataPoint]):
db_engine = get_relational_engine()
graph_engine = await get_graph_engine()
graph_data = await graph_engine.get_graph_data()

token_count_sum = await fetch_token_count(db_engine)
graph_metrics = await calculate_graph_metrics(graph_data)

table_name = "graph_metric_table"
metrics_dict = {"id": uuid.uuid4(), "num_tokens": token_count_sum} | graph_metrics

await db_engine.insert_data(table_name, metrics_dict)

0 comments on commit 72dfec4

Please sign in to comment.