Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Format with Ruff 0.9.0 #425

Merged
merged 1 commit into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ async def get_filtered_graph_data(self, attribute_filters):

query_edges = f"""
MATCH (n)-[r]->(m)
WHERE {where_clause} AND {where_clause.replace('n.', 'm.')}
WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
"""
result_edges = await self.query(query_edges)
Expand Down
2 changes: 1 addition & 1 deletion cognee/modules/retrieval/brute_force_triplet_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def filter_attributes(obj, attributes):
edge_info = {key: value for key, value in edge_attributes.items() if value is not None}

# Create the formatted triplet
triplet = f"Node1: {node1_info}\n" f"Edge: {edge_info}\n" f"Node2: {node2_info}\n\n\n"
triplet = f"Node1: {node1_info}\nEdge: {edge_info}\nNode2: {node2_info}\n\n\n"
triplets.append(triplet)

return "".join(triplets)
Expand Down
3 changes: 1 addition & 2 deletions cognee/modules/retrieval/description_to_codepart_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ async def code_description_to_code_part(

llm_client = get_llm_client()
context_from_documents = await llm_client.acreate_structured_output(
text_input=f"The retrieved context from documents"
f" is {concatenated_descriptions}.",
text_input=f"The retrieved context from documents is {concatenated_descriptions}.",
system_prompt="You are a Senior Software Engineer, summarize the context from documents"
f" in a way that it is gonna be provided next to codeparts as context"
f" while trying to solve this github issue connected to the project: {query}]",
Expand Down
18 changes: 9 additions & 9 deletions cognee/tests/integration/documents/AudioDocument_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ def test_AudioDocument():
for ground_truth, paragraph_data in zip(
GROUND_TRUTH, document.read(chunk_size=64, chunker="text_chunker")
):
assert (
ground_truth["word_count"] == paragraph_data.word_count
), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
assert ground_truth["len_text"] == len(
paragraph_data.text
), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
assert (
ground_truth["cut_type"] == paragraph_data.cut_type
), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
assert ground_truth["word_count"] == paragraph_data.word_count, (
f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
)
assert ground_truth["len_text"] == len(paragraph_data.text), (
f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
)
assert ground_truth["cut_type"] == paragraph_data.cut_type, (
f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
)
18 changes: 9 additions & 9 deletions cognee/tests/integration/documents/ImageDocument_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ def test_ImageDocument():
for ground_truth, paragraph_data in zip(
GROUND_TRUTH, document.read(chunk_size=64, chunker="text_chunker")
):
assert (
ground_truth["word_count"] == paragraph_data.word_count
), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
assert ground_truth["len_text"] == len(
paragraph_data.text
), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
assert (
ground_truth["cut_type"] == paragraph_data.cut_type
), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
assert ground_truth["word_count"] == paragraph_data.word_count, (
f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
)
assert ground_truth["len_text"] == len(paragraph_data.text), (
f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
)
assert ground_truth["cut_type"] == paragraph_data.cut_type, (
f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
)
18 changes: 9 additions & 9 deletions cognee/tests/integration/documents/PdfDocument_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ def test_PdfDocument():
for ground_truth, paragraph_data in zip(
GROUND_TRUTH, document.read(chunk_size=1024, chunker="text_chunker")
):
assert (
ground_truth["word_count"] == paragraph_data.word_count
), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
assert ground_truth["len_text"] == len(
paragraph_data.text
), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
assert (
ground_truth["cut_type"] == paragraph_data.cut_type
), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
assert ground_truth["word_count"] == paragraph_data.word_count, (
f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
)
assert ground_truth["len_text"] == len(paragraph_data.text), (
f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
)
assert ground_truth["cut_type"] == paragraph_data.cut_type, (
f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
)
18 changes: 9 additions & 9 deletions cognee/tests/integration/documents/TextDocument_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ def test_TextDocument(input_file, chunk_size):
for ground_truth, paragraph_data in zip(
GROUND_TRUTH[input_file], document.read(chunk_size=chunk_size, chunker="text_chunker")
):
assert (
ground_truth["word_count"] == paragraph_data.word_count
), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
assert ground_truth["len_text"] == len(
paragraph_data.text
), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
assert (
ground_truth["cut_type"] == paragraph_data.cut_type
), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
assert ground_truth["word_count"] == paragraph_data.word_count, (
f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
)
assert ground_truth["len_text"] == len(paragraph_data.text), (
f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
)
assert ground_truth["cut_type"] == paragraph_data.cut_type, (
f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
)
30 changes: 15 additions & 15 deletions cognee/tests/integration/documents/UnstructuredDocument_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,32 +71,32 @@ def test_UnstructuredDocument():
for paragraph_data in pptx_document.read(chunk_size=1024, chunker="text_chunker"):
assert 19 == paragraph_data.word_count, f" 19 != {paragraph_data.word_count = }"
assert 104 == len(paragraph_data.text), f" 104 != {len(paragraph_data.text) = }"
assert (
"sentence_cut" == paragraph_data.cut_type
), f" sentence_cut != {paragraph_data.cut_type = }"
assert "sentence_cut" == paragraph_data.cut_type, (
f" sentence_cut != {paragraph_data.cut_type = }"
)

# Test DOCX
for paragraph_data in docx_document.read(chunk_size=1024, chunker="text_chunker"):
assert 16 == paragraph_data.word_count, f" 16 != {paragraph_data.word_count = }"
assert 145 == len(paragraph_data.text), f" 145 != {len(paragraph_data.text) = }"
assert (
"sentence_end" == paragraph_data.cut_type
), f" sentence_end != {paragraph_data.cut_type = }"
assert "sentence_end" == paragraph_data.cut_type, (
f" sentence_end != {paragraph_data.cut_type = }"
)

# TEST CSV
for paragraph_data in csv_document.read(chunk_size=1024, chunker="text_chunker"):
assert 15 == paragraph_data.word_count, f" 15 != {paragraph_data.word_count = }"
assert (
"A A A A A A A A A,A A A A A A,A A" == paragraph_data.text
), f"Read text doesn't match expected text: {paragraph_data.text}"
assert (
"sentence_cut" == paragraph_data.cut_type
), f" sentence_cut != {paragraph_data.cut_type = }"
assert "A A A A A A A A A,A A A A A A,A A" == paragraph_data.text, (
f"Read text doesn't match expected text: {paragraph_data.text}"
)
assert "sentence_cut" == paragraph_data.cut_type, (
f" sentence_cut != {paragraph_data.cut_type = }"
)

# Test XLSX
for paragraph_data in xlsx_document.read(chunk_size=1024, chunker="text_chunker"):
assert 36 == paragraph_data.word_count, f" 36 != {paragraph_data.word_count = }"
assert 171 == len(paragraph_data.text), f" 171 != {len(paragraph_data.text) = }"
assert (
"sentence_cut" == paragraph_data.cut_type
), f" sentence_cut != {paragraph_data.cut_type = }"
assert "sentence_cut" == paragraph_data.cut_type, (
f" sentence_cut != {paragraph_data.cut_type = }"
)
12 changes: 6 additions & 6 deletions cognee/tests/test_deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ async def test_deduplication():

result = await relational_engine.get_all_data_from_table("data")
assert len(result) == 1, "More than one data entity was found."
assert (
result[0]["name"] == "Natural_language_processing_copy"
), "Result name does not match expected value."
assert result[0]["name"] == "Natural_language_processing_copy", (
"Result name does not match expected value."
)

result = await relational_engine.get_all_data_from_table("datasets")
assert len(result) == 2, "Unexpected number of datasets found."
Expand Down Expand Up @@ -61,9 +61,9 @@ async def test_deduplication():

result = await relational_engine.get_all_data_from_table("data")
assert len(result) == 1, "More than one data entity was found."
assert (
hashlib.md5(text.encode("utf-8")).hexdigest() in result[0]["name"]
), "Content hash is not a part of file name."
assert hashlib.md5(text.encode("utf-8")).hexdigest() in result[0]["name"], (
"Content hash is not a part of file name."
)

await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
Expand Down
6 changes: 3 additions & 3 deletions cognee/tests/test_falkordb.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ async def main():

from cognee.infrastructure.databases.relational import get_relational_engine

assert not os.path.exists(
get_relational_engine().db_path
), "SQLite relational database is not empty"
assert not os.path.exists(get_relational_engine().db_path), (
"SQLite relational database is not empty"
)

from cognee.infrastructure.databases.graph import get_graph_config

Expand Down
6 changes: 3 additions & 3 deletions cognee/tests/test_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ async def main():

from cognee.infrastructure.databases.relational import get_relational_engine

assert not os.path.exists(
get_relational_engine().db_path
), "SQLite relational database is not empty"
assert not os.path.exists(get_relational_engine().db_path), (
"SQLite relational database is not empty"
)

from cognee.infrastructure.databases.graph import get_graph_config

Expand Down
36 changes: 18 additions & 18 deletions cognee/tests/test_pgvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,28 @@ async def test_local_file_deletion(data_text, file_location):
data_hash = hashlib.md5(encoded_text).hexdigest()
# Get data entry from database based on hash contents
data = (await session.scalars(select(Data).where(Data.content_hash == data_hash))).one()
assert os.path.isfile(
data.raw_data_location
), f"Data location doesn't exist: {data.raw_data_location}"
assert os.path.isfile(data.raw_data_location), (
f"Data location doesn't exist: {data.raw_data_location}"
)
# Test deletion of data along with local files created by cognee
await engine.delete_data_entity(data.id)
assert not os.path.exists(
data.raw_data_location
), f"Data location still exists after deletion: {data.raw_data_location}"
assert not os.path.exists(data.raw_data_location), (
f"Data location still exists after deletion: {data.raw_data_location}"
)

async with engine.get_async_session() as session:
# Get data entry from database based on file path
data = (
await session.scalars(select(Data).where(Data.raw_data_location == file_location))
).one()
assert os.path.isfile(
data.raw_data_location
), f"Data location doesn't exist: {data.raw_data_location}"
assert os.path.isfile(data.raw_data_location), (
f"Data location doesn't exist: {data.raw_data_location}"
)
# Test local files not created by cognee won't get deleted
await engine.delete_data_entity(data.id)
assert os.path.exists(
data.raw_data_location
), f"Data location doesn't exists: {data.raw_data_location}"
assert os.path.exists(data.raw_data_location), (
f"Data location doesn't exists: {data.raw_data_location}"
)


async def test_getting_of_documents(dataset_name_1):
Expand All @@ -54,16 +54,16 @@ async def test_getting_of_documents(dataset_name_1):

user = await get_default_user()
document_ids = await get_document_ids_for_user(user.id, [dataset_name_1])
assert (
len(document_ids) == 1
), f"Number of expected documents doesn't match {len(document_ids)} != 1"
assert len(document_ids) == 1, (
f"Number of expected documents doesn't match {len(document_ids)} != 1"
)

# Test getting of documents for search when no dataset is provided
user = await get_default_user()
document_ids = await get_document_ids_for_user(user.id)
assert (
len(document_ids) == 2
), f"Number of expected documents doesn't match {len(document_ids)} != 2"
assert len(document_ids) == 2, (
f"Number of expected documents doesn't match {len(document_ids)} != 2"
)


async def main():
Expand Down
18 changes: 9 additions & 9 deletions cognee/tests/unit/processing/chunks/chunk_by_paragraph_2_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
def test_chunk_by_paragraph_isomorphism(input_text, paragraph_length, batch_paragraphs):
chunks = chunk_by_paragraph(input_text, paragraph_length, batch_paragraphs)
reconstructed_text = "".join([chunk["text"] for chunk in chunks])
assert (
reconstructed_text == input_text
), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
assert reconstructed_text == input_text, (
f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
)


@pytest.mark.parametrize(
Expand All @@ -36,9 +36,9 @@ def test_paragraph_chunk_length(input_text, paragraph_length, batch_paragraphs):
chunk_lengths = np.array([len(list(chunk_by_word(chunk["text"]))) for chunk in chunks])

larger_chunks = chunk_lengths[chunk_lengths > paragraph_length]
assert np.all(
chunk_lengths <= paragraph_length
), f"{paragraph_length = }: {larger_chunks} are too large"
assert np.all(chunk_lengths <= paragraph_length), (
f"{paragraph_length = }: {larger_chunks} are too large"
)


@pytest.mark.parametrize(
Expand All @@ -50,6 +50,6 @@ def test_chunk_by_paragraph_chunk_numbering(input_text, paragraph_length, batch_
data=input_text, paragraph_length=paragraph_length, batch_paragraphs=batch_paragraphs
)
chunk_indices = np.array([chunk["chunk_index"] for chunk in chunks])
assert np.all(
chunk_indices == np.arange(len(chunk_indices))
), f"{chunk_indices = } are not monotonically increasing"
assert np.all(chunk_indices == np.arange(len(chunk_indices))), (
f"{chunk_indices = } are not monotonically increasing"
)
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ def run_chunking_test(test_text, expected_chunks):

for expected_chunks_item, chunk in zip(expected_chunks, chunks):
for key in ["text", "word_count", "cut_type"]:
assert (
chunk[key] == expected_chunks_item[key]
), f"{key = }: {chunk[key] = } != {expected_chunks_item[key] = }"
assert chunk[key] == expected_chunks_item[key], (
f"{key = }: {chunk[key] = } != {expected_chunks_item[key] = }"
)


def test_chunking_whole_text():
Expand Down
12 changes: 6 additions & 6 deletions cognee/tests/unit/processing/chunks/chunk_by_sentence_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
def test_chunk_by_sentence_isomorphism(input_text, maximum_length):
chunks = chunk_by_sentence(input_text, maximum_length)
reconstructed_text = "".join([chunk[1] for chunk in chunks])
assert (
reconstructed_text == input_text
), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
assert reconstructed_text == input_text, (
f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
)


@pytest.mark.parametrize(
Expand All @@ -36,6 +36,6 @@ def test_paragraph_chunk_length(input_text, maximum_length):
chunk_lengths = np.array([len(list(chunk_by_word(chunk[1]))) for chunk in chunks])

larger_chunks = chunk_lengths[chunk_lengths > maximum_length]
assert np.all(
chunk_lengths <= maximum_length
), f"{maximum_length = }: {larger_chunks} are too large"
assert np.all(chunk_lengths <= maximum_length), (
f"{maximum_length = }: {larger_chunks} are too large"
)
6 changes: 3 additions & 3 deletions cognee/tests/unit/processing/chunks/chunk_by_word_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
def test_chunk_by_word_isomorphism(input_text):
chunks = chunk_by_word(input_text)
reconstructed_text = "".join([chunk[0] for chunk in chunks])
assert (
reconstructed_text == input_text
), f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
assert reconstructed_text == input_text, (
f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
)


@pytest.mark.parametrize(
Expand Down