Skip to content

Commit

Permalink
Merge branch 'dev' into COG-975
Browse files Browse the repository at this point in the history
  • Loading branch information
Vasilije1990 authored Jan 14, 2025
2 parents 047948a + 12031e6 commit 3ba98b2
Show file tree
Hide file tree
Showing 19 changed files with 975 additions and 86 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dockerhub.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

jobs:
docker-build-and-push:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest

steps:
- name: Checkout repository
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/py_lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
fail-fast: true
matrix:
os:
- ubuntu-22.04
- ubuntu-latest
python-version: ["3.10.x", "3.11.x"]

defaults:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/reusable_notebook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ jobs:
env:
ENV: 'dev'
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ruff_format.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ on: [ pull_request ]

jobs:
ruff:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/ruff-action@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ruff_lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ on: [ pull_request ]

jobs:
ruff:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/ruff-action@v2
2 changes: 1 addition & 1 deletion .github/workflows/test_deduplication.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env:
jobs:
run_deduplication_test:
name: test
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
defaults:
run:
shell: bash
Expand Down
20 changes: 20 additions & 0 deletions .github/workflows/test_llama_index_cognee_integration_notebook.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: test | llama index cognee integration notebook

on:
workflow_dispatch:
pull_request:
types: [labeled, synchronize]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
run_notebook_test:
uses: ./.github/workflows/reusable_notebook.yml
with:
notebook-location: notebooks/llama_index_cognee_integration.ipynb
secrets:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
2 changes: 1 addition & 1 deletion .github/workflows/test_qdrant.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:

run_qdrant_integration_test:
name: test
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
defaults:
run:
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_weaviate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:

run_weaviate_integration_test:
name: test
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
defaults:
run:
shell: bash
Expand Down
12 changes: 3 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,9 @@ cognee.config.set_graphistry_config({
})
```

(Optional) To run the UI, go to cognee-frontend directory and run:
```
npm run dev
```
or run everything in a docker container:
```
docker-compose up
```
Then navigate to localhost:3000
(Optional) To run the with an UI, go to cognee-mcp directory and follow the instructions.
You will be able to use cognee as mcp tool and create graphs and query them.


If you want to use Cognee with PostgreSQL, make sure to set the following values in the .env file:
```
Expand Down
3 changes: 2 additions & 1 deletion cognee-mcp/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ name = "cognee-mcp"
version = "0.1.0"
description = "A MCP server project"
readme = "README.md"
requires-python = ">=3.11.0"
requires-python = ">=3.10"

dependencies = [
"mcp>=1.1.1",
"openai==1.59.4",
Expand Down
641 changes: 637 additions & 4 deletions cognee-mcp/uv.lock

Large diffs are not rendered by default.

38 changes: 6 additions & 32 deletions cognee/modules/graph/cognee_graph/CogneeGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
import heapq
from graphistry import edges
import asyncio


class CogneeGraph(CogneeAbstractGraph):
Expand Down Expand Up @@ -127,51 +127,25 @@ async def map_vector_distances_to_graph_nodes(self, node_distances) -> None:
else:
print(f"Node with id {node_id} not found in the graph.")

async def map_vector_distances_to_graph_edges(
self, vector_engine, query
) -> None: # :TODO: When we calculate edge embeddings in vector db change this similarly to node mapping
async def map_vector_distances_to_graph_edges(self, vector_engine, query) -> None:
try:
# Step 1: Generate the query embedding
query_vector = await vector_engine.embed_data([query])
query_vector = query_vector[0]
if query_vector is None or len(query_vector) == 0:
raise ValueError("Failed to generate query embedding.")

# Step 2: Collect all unique relationship types
unique_relationship_types = set()
for edge in self.edges:
relationship_type = edge.attributes.get("relationship_type")
if relationship_type:
unique_relationship_types.add(relationship_type)

# Step 3: Embed all unique relationship types
unique_relationship_types = list(unique_relationship_types)
relationship_type_embeddings = await vector_engine.embed_data(unique_relationship_types)

# Step 4: Map relationship types to their embeddings and calculate distances
embedding_map = {}
for relationship_type, embedding in zip(
unique_relationship_types, relationship_type_embeddings
):
edge_vector = np.array(embedding)

# Calculate cosine similarity
similarity = np.dot(query_vector, edge_vector) / (
np.linalg.norm(query_vector) * np.linalg.norm(edge_vector)
)
distance = 1 - similarity
edge_distances = await vector_engine.get_distance_from_collection_elements(
"edge_type_relationship_name", query_text=query
)

# Round the distance to 4 decimal places and store it
embedding_map[relationship_type] = round(distance, 4)
embedding_map = {result.payload["text"]: result.score for result in edge_distances}

# Step 4: Assign precomputed distances to edges
for edge in self.edges:
relationship_type = edge.attributes.get("relationship_type")
if not relationship_type or relationship_type not in embedding_map:
print(f"Edge {edge} has an unknown or missing relationship type.")
continue

# Assign the precomputed distance
edge.attributes["vector_distance"] = embedding_map[relationship_type]

except Exception as ex:
Expand Down
25 changes: 1 addition & 24 deletions cognee/modules/retrieval/brute_force_triplet_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,24 +62,6 @@ async def brute_force_triplet_search(
return retrieved_results


def delete_duplicated_vector_db_elements(
collections, results
): #:TODO: This is just for now to fix vector db duplicates
results_dict = {}
for collection, results in zip(collections, results):
seen_ids = set()
unique_results = []
for result in results:
if result.id not in seen_ids:
unique_results.append(result)
seen_ids.add(result.id)
else:
print(f"Duplicate found in collection '{collection}': {result.id}")
results_dict[collection] = unique_results

return results_dict


async def brute_force_search(
query: str, user: User, top_k: int, collections: List[str] = None
) -> list:
Expand Down Expand Up @@ -125,10 +107,7 @@ async def brute_force_search(
]
)

############################################# :TODO: Change when vector db does not contain duplicates
node_distances = delete_duplicated_vector_db_elements(collections, results)
# node_distances = {collection: result for collection, result in zip(collections, results)}
##############################################
node_distances = {collection: result for collection, result in zip(collections, results)}

memory_fragment = CogneeGraph()

Expand All @@ -140,14 +119,12 @@ async def brute_force_search(

await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)

#:TODO: Change when vectordb contains edge embeddings
await memory_fragment.map_vector_distances_to_graph_edges(vector_engine, query)

results = await memory_fragment.calculate_top_triplet_importances(k=top_k)

send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)

#:TODO: Once we have Edge pydantic models we should retrieve the exact edge and node objects from graph db
return results

except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions cognee/modules/users/methods/get_default_user.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.future import select
from cognee.modules.users.models import User
from cognee.infrastructure.databases.relational import get_relational_engine
Expand All @@ -11,7 +11,7 @@ async def get_default_user():
async with db_engine.get_async_session() as session:
query = (
select(User)
.options(joinedload(User.groups))
.options(selectinload(User.groups))
.where(User.email == "default_user@example.com")
)

Expand Down
14 changes: 9 additions & 5 deletions cognee/shared/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,16 +451,20 @@ def graph_to_tuple(graph):


def setup_logging(log_level=logging.INFO):
"""This method sets up the logging configuration."""
"""Sets up the logging configuration."""
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s\n")

stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setFormatter(formatter)
stream_handler.setLevel(log_level)

logging.basicConfig(
level=log_level,
handlers=[stream_handler],
)
root_logger = logging.getLogger()

if root_logger.hasHandlers():
root_logger.handlers.clear()

root_logger.addHandler(stream_handler)
root_logger.setLevel(log_level)


# ---------------- Example Usage ----------------
Expand Down
2 changes: 1 addition & 1 deletion examples/python/dynamic_steps_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ async def main(enable_steps):


if __name__ == "__main__":
setup_logging(logging.INFO)
setup_logging(logging.ERROR)

rebuild_kg = True
retrieve = True
Expand Down
Loading

0 comments on commit 3ba98b2

Please sign in to comment.