From 8788a2f4f41a4c49cfbe8bb49a5c8893fd8959e6 Mon Sep 17 00:00:00 2001 From: RafaelJohn9 Date: Tue, 22 Oct 2024 10:58:54 +0300 Subject: [PATCH 1/2] Test: added test coverate in Doc2Vec Signed-off-by: RafaelJohn9 --- .../Doc2VecVectorStore_unit_test.py | 74 +++++++++++++++---- 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/pkgs/swarmauri/tests/unit/vector_stores/Doc2VecVectorStore_unit_test.py b/pkgs/swarmauri/tests/unit/vector_stores/Doc2VecVectorStore_unit_test.py index a3af5be10..d1691a2a4 100644 --- a/pkgs/swarmauri/tests/unit/vector_stores/Doc2VecVectorStore_unit_test.py +++ b/pkgs/swarmauri/tests/unit/vector_stores/Doc2VecVectorStore_unit_test.py @@ -2,29 +2,71 @@ from swarmauri.documents.concrete.Document import Document from swarmauri.vector_stores.concrete.Doc2VecVectorStore import Doc2VecVectorStore + @pytest.mark.unit def test_ubc_resource(): - vs = Doc2VecVectorStore() - assert vs.resource == 'VectorStore' - assert vs.embedder.resource == 'Embedding' + vs = Doc2VecVectorStore() + assert vs.resource == "VectorStore" + assert vs.embedder.resource == "Embedding" + @pytest.mark.unit def test_ubc_type(): - vs = Doc2VecVectorStore() - assert vs.type == 'Doc2VecVectorStore' + vs = Doc2VecVectorStore() + assert vs.type == "Doc2VecVectorStore" + @pytest.mark.unit def test_serialization(): - vs = Doc2VecVectorStore() - assert vs.id == Doc2VecVectorStore.model_validate_json(vs.model_dump_json()).id + vs = Doc2VecVectorStore() + assert vs.id == Doc2VecVectorStore.model_validate_json(vs.model_dump_json()).id + + +@pytest.mark.unit +def test_top_k(): + vs = Doc2VecVectorStore() + documents = [ + Document(content="test"), + Document(content="test1"), + Document(content="test2"), + Document(content="test3"), + ] + + vs.add_documents(documents) + assert len(vs.retrieve(query="test", top_k=2)) == 2 + + +@pytest.mark.unit +def test_adding_more_doc(): + vs = Doc2VecVectorStore() + documents_batch_1 = [ + Document(content="test"), + Document(content="test1"), + Document(content="test2"), + Document(content="test3"), + ] + documents_batch_2 = [ + Document(content="test4"), + Document(content="test5"), + Document(content="test6"), + Document(content="test7"), + ] + doc_count = len(documents_batch_1) + len(documents_batch_2) + + vs.add_documents(documents_batch_1) + vs.add_documents(documents_batch_2) + assert len(vs.retrieve(query="test", top_k=doc_count)) == doc_count + @pytest.mark.unit -def top_k_test(): - vs = Doc2VecVectorStore() - documents = [Document(content="test"), - Document(content='test1'), - Document(content='test2'), - Document(content='test3')] - - vs.add_documents(documents) - assert len(vs.retrieve(query='test', top_k=2)) == 2 +def test_oov(): + """Test for Out Of Vocabulary (OOV) words""" + vs = Doc2VecVectorStore() + documents = [ + Document(content="test"), + Document(content="test1"), + Document(content="test2"), + Document(content="test3"), + ] + vs.add_documents(documents) + assert len(vs.retrieve(query="what is test 4", top_k=2)) == 2 From d838893d3f8469f0c63a2f5fc605404a75013d98 Mon Sep 17 00:00:00 2001 From: RafaelJohn9 Date: Tue, 22 Oct 2024 11:06:52 +0300 Subject: [PATCH 2/2] swarm - Fix: fixed the test to have correct test Signed-off-by: RafaelJohn9 --- .../unit/vector_stores/Doc2VecVectorStore_unit_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkgs/swarmauri/tests/unit/vector_stores/Doc2VecVectorStore_unit_test.py b/pkgs/swarmauri/tests/unit/vector_stores/Doc2VecVectorStore_unit_test.py index d1691a2a4..7afcd7097 100644 --- a/pkgs/swarmauri/tests/unit/vector_stores/Doc2VecVectorStore_unit_test.py +++ b/pkgs/swarmauri/tests/unit/vector_stores/Doc2VecVectorStore_unit_test.py @@ -46,10 +46,10 @@ def test_adding_more_doc(): Document(content="test3"), ] documents_batch_2 = [ - Document(content="test4"), - Document(content="test5"), - Document(content="test6"), - Document(content="test7"), + Document(content="This is a test. Test number 4"), + Document(content="This is a test. Test number 5"), + Document(content="This is a test. Test number 6"), + Document(content="This is a test. Test number 7"), ] doc_count = len(documents_batch_1) + len(documents_batch_2)