Skip to content

Commit

Permalink
[TST]: update test_persist to create 1+ collections (#2933)
Browse files Browse the repository at this point in the history
  • Loading branch information
codetheweb authored Oct 22, 2024
1 parent a2e6be2 commit 044556e
Showing 1 changed file with 63 additions and 33 deletions.
96 changes: 63 additions & 33 deletions chromadb/test/property/test_persist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from multiprocessing.connection import Connection
import multiprocessing.context
import time
from typing import Generator, Callable
from typing import Generator, Callable, List, Tuple
from uuid import UUID
from hypothesis import given
import hypothesis.strategies as st
Expand Down Expand Up @@ -66,44 +66,70 @@ def settings(request: pytest.FixtureRequest) -> Generator[Settings, None, None]:
with_hnsw_params=True,
with_persistent_hnsw_params=st.just(True),
# Makes it more likely to find persist-related bugs (by default these are set to 2000).
# Lower values make it more likely that a test will trigger a persist to disk.
max_hnsw_batch_size=10,
max_hnsw_sync_threshold=10,
),
key="coll",
)


@st.composite
def collection_and_recordset_strategy(
draw: st.DrawFn,
) -> Tuple[strategies.Collection, strategies.RecordSet]:
collection = draw(
strategies.collections(
with_hnsw_params=True,
with_persistent_hnsw_params=st.just(True),
# Makes it more likely to find persist-related bugs (by default these are set to 2000).
max_hnsw_batch_size=10,
max_hnsw_sync_threshold=10,
)
)
recordset = draw(strategies.recordsets(st.just(collection)))
return collection, recordset


@given(
collection_strategy=collection_st,
embeddings_strategy=strategies.recordsets(collection_st),
collection_and_recordset_strategies=st.lists(
collection_and_recordset_strategy(),
min_size=1,
unique_by=(lambda x: x[0].name, lambda x: x[0].name),
)
)
def test_persist(
settings: Settings,
collection_strategy: strategies.Collection,
embeddings_strategy: strategies.RecordSet,
collection_and_recordset_strategies: List[
Tuple[strategies.Collection, strategies.RecordSet]
],
) -> None:
system_1 = System(settings)
system_1.start()
client_1 = ClientCreator.from_system(system_1)

client_1.reset()
coll = client_1.create_collection(
name=collection_strategy.name,
metadata=collection_strategy.metadata, # type: ignore[arg-type]
embedding_function=collection_strategy.embedding_function,
)
for (
collection_strategy,
recordset_strategy,
) in collection_and_recordset_strategies:
coll = client_1.create_collection(
name=collection_strategy.name,
metadata=collection_strategy.metadata, # type: ignore[arg-type]
embedding_function=collection_strategy.embedding_function,
)

coll.add(**embeddings_strategy) # type: ignore[arg-type]
coll.add(**recordset_strategy) # type: ignore[arg-type]

invariants.count(coll, embeddings_strategy)
invariants.metadatas_match(coll, embeddings_strategy)
invariants.documents_match(coll, embeddings_strategy)
invariants.ids_match(coll, embeddings_strategy)
invariants.ann_accuracy(
coll,
embeddings_strategy,
embedding_function=collection_strategy.embedding_function,
)
invariants.count(coll, recordset_strategy)
invariants.metadatas_match(coll, recordset_strategy)
invariants.documents_match(coll, recordset_strategy)
invariants.ids_match(coll, recordset_strategy)
invariants.ann_accuracy(
coll,
recordset_strategy,
embedding_function=collection_strategy.embedding_function,
)

system_1.stop()
del client_1
Expand All @@ -113,19 +139,23 @@ def test_persist(
system_2.start()
client_2 = ClientCreator.from_system(system_2)

coll = client_2.get_collection(
name=collection_strategy.name,
embedding_function=collection_strategy.embedding_function,
)
invariants.count(coll, embeddings_strategy)
invariants.metadatas_match(coll, embeddings_strategy)
invariants.documents_match(coll, embeddings_strategy)
invariants.ids_match(coll, embeddings_strategy)
invariants.ann_accuracy(
coll,
embeddings_strategy,
embedding_function=collection_strategy.embedding_function,
)
for (
collection_strategy,
recordset_strategy,
) in collection_and_recordset_strategies:
coll = client_2.get_collection(
name=collection_strategy.name,
embedding_function=collection_strategy.embedding_function,
)
invariants.count(coll, recordset_strategy)
invariants.metadatas_match(coll, recordset_strategy)
invariants.documents_match(coll, recordset_strategy)
invariants.ids_match(coll, recordset_strategy)
invariants.ann_accuracy(
coll,
recordset_strategy,
embedding_function=collection_strategy.embedding_function,
)

system_2.stop()
del client_2
Expand Down

0 comments on commit 044556e

Please sign in to comment.