From d85aa1bd4d02723cc689f3003c4db8ca3069c4fd Mon Sep 17 00:00:00 2001 From: lxobr <122801072+lxobr@users.noreply.github.com> Date: Wed, 15 Jan 2025 12:42:53 +0100 Subject: [PATCH 1/5] feat: make tasks a configurable argument in the cognify function --- cognee/api/v1/cognify/cognify_v2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 738f77c5..7f36930f 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -160,7 +160,6 @@ async def get_default_tasks( summarization_model=cognee_config.summarization_model, task_config={"batch_size": 10}, ), - Task(add_data_points, only_root=True, task_config={"batch_size": 10}), ] except Exception as error: send_telemetry("cognee.cognify DEFAULT TASKS CREATION ERRORED", user.id) From 6e708547f441dd5e6c90651453f5257f141e8bbb Mon Sep 17 00:00:00 2001 From: lxobr <122801072+lxobr@users.noreply.github.com> Date: Wed, 15 Jan 2025 18:27:33 +0100 Subject: [PATCH 2/5] fix: add data points task --- cognee/api/v1/cognify/cognify_v2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cognee/api/v1/cognify/cognify_v2.py b/cognee/api/v1/cognify/cognify_v2.py index 7f36930f..738f77c5 100644 --- a/cognee/api/v1/cognify/cognify_v2.py +++ b/cognee/api/v1/cognify/cognify_v2.py @@ -160,6 +160,7 @@ async def get_default_tasks( summarization_model=cognee_config.summarization_model, task_config={"batch_size": 10}, ), + Task(add_data_points, only_root=True, task_config={"batch_size": 10}), ] except Exception as error: send_telemetry("cognee.cognify DEFAULT TASKS CREATION ERRORED", user.id) From 8b60ba1fa957074a68018374629461b0519c6eb7 Mon Sep 17 00:00:00 2001 From: Rita Aleksziev Date: Thu, 23 Jan 2025 14:56:11 +0100 Subject: [PATCH 3/5] Define pydantic models for descriptive graph metrics and input metrics --- cognee/modules/data/models/MetricData.py | 57 ++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 cognee/modules/data/models/MetricData.py diff --git a/cognee/modules/data/models/MetricData.py b/cognee/modules/data/models/MetricData.py new file mode 100644 index 00000000..98d36d33 --- /dev/null +++ b/cognee/modules/data/models/MetricData.py @@ -0,0 +1,57 @@ +from datetime import datetime, timezone + +from sqlalchemy import Column, DateTime, Float, Integer, ARRAY + +from cognee.infrastructure.databases.relational import Base + + +class GraphMetricData(Base): + __tablename__ = "graph_metric_table" + + num_nodes = Column(Integer) + num_edges = Column(Integer) + mean_degree = Column(Float) + edge_density = Column(Float) + num_connected_components = Column(Integer) + sizes_of_connected_components = Column(ARRAY(Integer)) + num_selfloops = Column(Integer, nullable=True) + diameter = Column(Integer, nullable=True) + avg_shortest_path_length = Column(Float, nullable=True) + avg_clustering = Column(Float, nullable=True) + + created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) + + def to_json(self) -> dict: + return { + "num_nodes": self.num_nodes, + "num_edges": self.num_edges, + "mean_degree": self.mean_degree, + "edge_density": self.edge_density, + "num_connected_components": self.num_connected_components, + "sizes_of_connected_components": self.sizes_of_connected_components, + "num_selfloops": self.num_selfloops if self.num_selfloops else None, + "diameter": self.diameter if self.diameter else None, + "avg_shortest_path_length": self.avg_shortest_path_length + if self.avg_shortest_path_length + else None, + "avg_clustering": self.avg_clustering if self.avg_clustering else None, + "createdAt": self.created_at.isoformat(), + "updatedAt": self.updated_at.isoformat() if self.updated_at else None, + } + + +class InputMetricData(Base): + __tablename__ = "input_metric_table" + + num_tokens = Column(Integer) + + created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) + + def to_json(self) -> dict: + return { + "num_tokens": self.num_tokens, + "createdAt": self.created_at.isoformat(), + "updatedAt": self.updated_at.isoformat() if self.updated_at else None, + } From bbc39a7ded9f23555351719472aaa8779b4f773c Mon Sep 17 00:00:00 2001 From: Rita Aleksziev Date: Sat, 25 Jan 2025 17:28:50 +0100 Subject: [PATCH 4/5] remove to_json method --- cognee/modules/data/models/MetricData.py | 25 ------------------------ 1 file changed, 25 deletions(-) diff --git a/cognee/modules/data/models/MetricData.py b/cognee/modules/data/models/MetricData.py index 98d36d33..3132852c 100644 --- a/cognee/modules/data/models/MetricData.py +++ b/cognee/modules/data/models/MetricData.py @@ -22,24 +22,6 @@ class GraphMetricData(Base): created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) - def to_json(self) -> dict: - return { - "num_nodes": self.num_nodes, - "num_edges": self.num_edges, - "mean_degree": self.mean_degree, - "edge_density": self.edge_density, - "num_connected_components": self.num_connected_components, - "sizes_of_connected_components": self.sizes_of_connected_components, - "num_selfloops": self.num_selfloops if self.num_selfloops else None, - "diameter": self.diameter if self.diameter else None, - "avg_shortest_path_length": self.avg_shortest_path_length - if self.avg_shortest_path_length - else None, - "avg_clustering": self.avg_clustering if self.avg_clustering else None, - "createdAt": self.created_at.isoformat(), - "updatedAt": self.updated_at.isoformat() if self.updated_at else None, - } - class InputMetricData(Base): __tablename__ = "input_metric_table" @@ -48,10 +30,3 @@ class InputMetricData(Base): created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) - - def to_json(self) -> dict: - return { - "num_tokens": self.num_tokens, - "createdAt": self.created_at.isoformat(), - "updatedAt": self.updated_at.isoformat() if self.updated_at else None, - } From 0c62101042985c8661daa69aa9d4aae2ac190c8a Mon Sep 17 00:00:00 2001 From: Rita Aleksziev Date: Mon, 27 Jan 2025 17:52:32 +0100 Subject: [PATCH 5/5] Use just one MetricData class instead of two --- cognee/modules/data/models/MetricData.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/cognee/modules/data/models/MetricData.py b/cognee/modules/data/models/MetricData.py index 3132852c..ef6c33d1 100644 --- a/cognee/modules/data/models/MetricData.py +++ b/cognee/modules/data/models/MetricData.py @@ -1,13 +1,17 @@ from datetime import datetime, timezone -from sqlalchemy import Column, DateTime, Float, Integer, ARRAY +from sqlalchemy import Column, DateTime, Float, Integer, ARRAY, UUID from cognee.infrastructure.databases.relational import Base +from uuid import uuid4 class GraphMetricData(Base): __tablename__ = "graph_metric_table" + # TODO: Change ID to reflect unique id of graph database + id = Column(UUID, primary_key=True, default=uuid4) + num_tokens = Column(Integer) num_nodes = Column(Integer) num_edges = Column(Integer) mean_degree = Column(Float) @@ -21,12 +25,3 @@ class GraphMetricData(Base): created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)) - - -class InputMetricData(Base): - __tablename__ = "input_metric_table" - - num_tokens = Column(Integer) - - created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) - updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))