From 6164212af05372624ce4215246f26728edcaacb2 Mon Sep 17 00:00:00 2001 From: Filip Haltmayer Date: Mon, 10 Jul 2023 21:51:14 -0700 Subject: [PATCH 1/7] Adding Milvus memory connector Signed-off-by: Filip Haltmayer --- python/poetry.lock | 205 +++++++- python/pyproject.toml | 4 + .../connectors/memory/milvus/__init__.py | 5 + .../memory/milvus/milvus_memory_store.py | 469 ++++++++++++++++++ .../connectors/memory/test_milvus.py | 216 ++++++++ 5 files changed, 898 insertions(+), 1 deletion(-) create mode 100644 python/semantic_kernel/connectors/memory/milvus/__init__.py create mode 100644 python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py create mode 100644 python/tests/integration/connectors/memory/test_milvus.py diff --git a/python/poetry.lock b/python/poetry.lock index a29c4d7b0c45..6a5aab67742d 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -857,6 +857,27 @@ files = [ {file = "duckdb-0.8.1.tar.gz", hash = "sha256:a54d37f4abc2afc4f92314aaa56ecf215a411f40af4bffe1e86bd25e62aceee9"}, ] +[[package]] +name = "environs" +version = "9.5.0" +description = "simplified environment variable parsing" +optional = false +python-versions = ">=3.6" +files = [ + {file = "environs-9.5.0-py2.py3-none-any.whl", hash = "sha256:1e549569a3de49c05f856f40bce86979e7d5ffbbc4398e7f338574c220189124"}, + {file = "environs-9.5.0.tar.gz", hash = "sha256:a76307b36fbe856bdca7ee9161e6c466fd7fcffc297109a118c59b54e27e30c9"}, +] + +[package.dependencies] +marshmallow = ">=3.0.0" +python-dotenv = "*" + +[package.extras] +dev = ["dj-database-url", "dj-email-url", "django-cache-url", "flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)", "pytest", "tox"] +django = ["dj-database-url", "dj-email-url", "django-cache-url"] +lint = ["flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)"] +tests = ["dj-database-url", "dj-email-url", "django-cache-url", "pytest"] + [[package]] name = "exceptiongroup" version = "1.1.1" @@ -1058,6 +1079,63 @@ files = [ {file = "graphlib_backport-1.0.3.tar.gz", hash = "sha256:7bb8fc7757b8ae4e6d8000a26cd49e9232aaa9a3aa57edb478474b8424bfaae2"}, ] +[[package]] +name = "grpcio" +version = "1.56.0" +description = "HTTP/2-based RPC framework" +optional = false +python-versions = ">=3.7" +files = [ + {file = "grpcio-1.56.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:fb34ace11419f1ae321c36ccaa18d81cd3f20728cd191250be42949d6845bb2d"}, + {file = "grpcio-1.56.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:008767c0aed4899e657b50f2e0beacbabccab51359eba547f860e7c55f2be6ba"}, + {file = "grpcio-1.56.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:17f47aeb9be0da5337f9ff33ebb8795899021e6c0741ee68bd69774a7804ca86"}, + {file = "grpcio-1.56.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43c50d810cc26349b093bf2cfe86756ab3e9aba3e7e681d360930c1268e1399a"}, + {file = "grpcio-1.56.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187b8f71bad7d41eea15e0c9812aaa2b87adfb343895fffb704fb040ca731863"}, + {file = "grpcio-1.56.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:881575f240eb5db72ddca4dc5602898c29bc082e0d94599bf20588fb7d1ee6a0"}, + {file = "grpcio-1.56.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c243b158dd7585021d16c50498c4b2ec0a64a6119967440c5ff2d8c89e72330e"}, + {file = "grpcio-1.56.0-cp310-cp310-win32.whl", hash = "sha256:8b3b2c7b5feef90bc9a5fa1c7f97637e55ec3e76460c6d16c3013952ee479cd9"}, + {file = "grpcio-1.56.0-cp310-cp310-win_amd64.whl", hash = "sha256:03a80451530fd3b8b155e0c4480434f6be669daf7ecba56f73ef98f94222ee01"}, + {file = "grpcio-1.56.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:64bd3abcf9fb4a9fa4ede8d0d34686314a7075f62a1502217b227991d9ca4245"}, + {file = "grpcio-1.56.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:fdc3a895791af4addbb826808d4c9c35917c59bb5c430d729f44224e51c92d61"}, + {file = "grpcio-1.56.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:4f84a6fd4482e5fe73b297d4874b62a535bc75dc6aec8e9fe0dc88106cd40397"}, + {file = "grpcio-1.56.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:14e70b4dda3183abea94c72d41d5930c333b21f8561c1904a372d80370592ef3"}, + {file = "grpcio-1.56.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b5ce42a5ebe3e04796246ba50357f1813c44a6efe17a37f8dc7a5c470377312"}, + {file = "grpcio-1.56.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8219f17baf069fe8e42bd8ca0b312b875595e43a70cabf397be4fda488e2f27d"}, + {file = "grpcio-1.56.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:defdd14b518e6e468466f799aaa69db0355bca8d3a5ea75fb912d28ba6f8af31"}, + {file = "grpcio-1.56.0-cp311-cp311-win32.whl", hash = "sha256:50f4daa698835accbbcc60e61e0bc29636c0156ddcafb3891c987e533a0031ba"}, + {file = "grpcio-1.56.0-cp311-cp311-win_amd64.whl", hash = "sha256:59c4e606993a47146fbeaf304b9e78c447f5b9ee5641cae013028c4cca784617"}, + {file = "grpcio-1.56.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:b1f4b6f25a87d80b28dd6d02e87d63fe1577fe6d04a60a17454e3f8077a38279"}, + {file = "grpcio-1.56.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:c2148170e01d464d41011a878088444c13413264418b557f0bdcd1bf1b674a0e"}, + {file = "grpcio-1.56.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:0409de787ebbf08c9d2bca2bcc7762c1efe72eada164af78b50567a8dfc7253c"}, + {file = "grpcio-1.56.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66f0369d27f4c105cd21059d635860bb2ea81bd593061c45fb64875103f40e4a"}, + {file = "grpcio-1.56.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38fdf5bd0a1c754ce6bf9311a3c2c7ebe56e88b8763593316b69e0e9a56af1de"}, + {file = "grpcio-1.56.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:79d4c5911d12a7aa671e5eb40cbb50a830396525014d2d6f254ea2ba180ce637"}, + {file = "grpcio-1.56.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5d2fc471668a7222e213f86ef76933b18cdda6a51ea1322034478df8c6519959"}, + {file = "grpcio-1.56.0-cp37-cp37m-win_amd64.whl", hash = "sha256:991224fd485e088d3cb5e34366053691a4848a6b7112b8f5625a411305c26691"}, + {file = "grpcio-1.56.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:c6f36621aabecbaff3e70c4d1d924c76c8e6a7ffec60c331893640a4af0a8037"}, + {file = "grpcio-1.56.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:1eadd6de258901929223f422ffed7f8b310c0323324caf59227f9899ea1b1674"}, + {file = "grpcio-1.56.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:72836b5a1d4f508ffbcfe35033d027859cc737972f9dddbe33fb75d687421e2e"}, + {file = "grpcio-1.56.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f92a99ab0c7772fb6859bf2e4f44ad30088d18f7c67b83205297bfb229e0d2cf"}, + {file = "grpcio-1.56.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa08affbf672d051cd3da62303901aeb7042a2c188c03b2c2a2d346fc5e81c14"}, + {file = "grpcio-1.56.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e2db108b4c8e29c145e95b0226973a66d73ae3e3e7fae00329294af4e27f1c42"}, + {file = "grpcio-1.56.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8674fdbd28266d8efbcddacf4ec3643f76fe6376f73283fd63a8374c14b0ef7c"}, + {file = "grpcio-1.56.0-cp38-cp38-win32.whl", hash = "sha256:bd55f743e654fb050c665968d7ec2c33f03578a4bbb163cfce38024775ff54cc"}, + {file = "grpcio-1.56.0-cp38-cp38-win_amd64.whl", hash = "sha256:c63bc5ac6c7e646c296fed9139097ae0f0e63f36f0864d7ce431cce61fe0118a"}, + {file = "grpcio-1.56.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c0bc9dda550785d23f4f025be614b7faa8d0293e10811f0f8536cf50435b7a30"}, + {file = "grpcio-1.56.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:d596408bab632ec7b947761e83ce6b3e7632e26b76d64c239ba66b554b7ee286"}, + {file = "grpcio-1.56.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:76b6e6e1ee9bda32e6e933efd61c512e9a9f377d7c580977f090d1a9c78cca44"}, + {file = "grpcio-1.56.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7beb84ebd0a3f732625124b73969d12b7350c5d9d64ddf81ae739bbc63d5b1ed"}, + {file = "grpcio-1.56.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83ec714bbbe9b9502177c842417fde39f7a267031e01fa3cd83f1ca49688f537"}, + {file = "grpcio-1.56.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:4feee75565d1b5ab09cb3a5da672b84ca7f6dd80ee07a50f5537207a9af543a4"}, + {file = "grpcio-1.56.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b4638a796778329cc8e142e4f57c705adb286b3ba64e00b0fa91eeb919611be8"}, + {file = "grpcio-1.56.0-cp39-cp39-win32.whl", hash = "sha256:437af5a7673bca89c4bc0a993382200592d104dd7bf55eddcd141cef91f40bab"}, + {file = "grpcio-1.56.0-cp39-cp39-win_amd64.whl", hash = "sha256:4241a1c2c76e748023c834995cd916570e7180ee478969c2d79a60ce007bc837"}, + {file = "grpcio-1.56.0.tar.gz", hash = "sha256:4c08ee21b3d10315b8dc26f6c13917b20ed574cdbed2d2d80c53d5508fdcc0f2"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.56.0)"] + [[package]] name = "h11" version = "0.14.0" @@ -1533,6 +1611,26 @@ files = [ {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, ] +[[package]] +name = "marshmallow" +version = "3.19.0" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.7" +files = [ + {file = "marshmallow-3.19.0-py3-none-any.whl", hash = "sha256:93f0958568da045b0021ec6aeb7ac37c81bfcccbb9a0e7ed8559885070b3a19b"}, + {file = "marshmallow-3.19.0.tar.gz", hash = "sha256:90032c0fd650ce94b6ec6dc8dfeb0e3ff50c144586462c389b81a07205bedb78"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["flake8 (==5.0.4)", "flake8-bugbear (==22.10.25)", "mypy (==0.990)", "pre-commit (>=2.4,<3.0)", "pytest", "pytz", "simplejson", "tox"] +docs = ["alabaster (==0.7.12)", "autodocsumm (==0.2.9)", "sphinx (==5.3.0)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"] +lint = ["flake8 (==5.0.4)", "flake8-bugbear (==22.10.25)", "mypy (==0.990)", "pre-commit (>=2.4,<3.0)"] +tests = ["pytest", "pytz", "simplejson"] + [[package]] name = "matplotlib-inline" version = "0.1.6" @@ -1547,6 +1645,22 @@ files = [ [package.dependencies] traitlets = "*" +[[package]] +name = "milvus" +version = "2.2.11" +description = "Embeded Milvus" +optional = false +python-versions = ">=3.6" +files = [ + {file = "milvus-2.2.11-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:64fa0fcbce1cb763d3aac0749cc17e04761e832297eae12ba5c97938f1acd243"}, + {file = "milvus-2.2.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c439d4231019e8cb78b13572dcd78a388cb63a5c271a2ab059bb54f019b1eb1c"}, + {file = "milvus-2.2.11-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d124cf7d6f914177ba14fb38c6a4ea305e3b6a8a09a86e7fc80f44270c0f6ede"}, + {file = "milvus-2.2.11-py3-none-win_amd64.whl", hash = "sha256:118569f56584670f8b1b7b4c89c0050b4678884b4719b8659edb1d47f12bd177"}, +] + +[package.extras] +client = ["pymilvus (>=2.2.0,<2.3.0)"] + [[package]] name = "monotonic" version = "1.6" @@ -2521,6 +2635,25 @@ files = [ [package.extras] plugins = ["importlib-metadata"] +[[package]] +name = "pymilvus" +version = "2.2.13" +description = "Python Sdk for Milvus" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pymilvus-2.2.13-py3-none-any.whl", hash = "sha256:ac991863bd63e860c1210d096695297175c6ed09f4de762cf42394cb5aecd1f6"}, + {file = "pymilvus-2.2.13.tar.gz", hash = "sha256:72da36cb5f4f84d7a8307202fcaa9a7fc4497d28d2d2235045ba93a430691ef1"}, +] + +[package.dependencies] +environs = "<=9.5.0" +grpcio = ">=1.49.1,<=1.56.0" +numpy = {version = "<1.25.0", markers = "python_version <= \"3.8\""} +pandas = ">=1.2.4" +protobuf = ">=3.20.0" +ujson = ">=2.0.0" + [[package]] name = "pyreadline3" version = "3.4.1" @@ -3573,6 +3706,76 @@ files = [ {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, ] +[[package]] +name = "ujson" +version = "5.8.0" +description = "Ultra fast JSON encoder and decoder for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "ujson-5.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4511560d75b15ecb367eef561554959b9d49b6ec3b8d5634212f9fed74a6df1"}, + {file = "ujson-5.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9399eaa5d1931a0ead49dce3ffacbea63f3177978588b956036bfe53cdf6af75"}, + {file = "ujson-5.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4e7bb7eba0e1963f8b768f9c458ecb193e5bf6977090182e2b4f4408f35ac76"}, + {file = "ujson-5.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40931d7c08c4ce99adc4b409ddb1bbb01635a950e81239c2382cfe24251b127a"}, + {file = "ujson-5.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d53039d39de65360e924b511c7ca1a67b0975c34c015dd468fca492b11caa8f7"}, + {file = "ujson-5.8.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bdf04c6af3852161be9613e458a1fb67327910391de8ffedb8332e60800147a2"}, + {file = "ujson-5.8.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a70f776bda2e5072a086c02792c7863ba5833d565189e09fabbd04c8b4c3abba"}, + {file = "ujson-5.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f26629ac531d712f93192c233a74888bc8b8212558bd7d04c349125f10199fcf"}, + {file = "ujson-5.8.0-cp310-cp310-win32.whl", hash = "sha256:7ecc33b107ae88405aebdb8d82c13d6944be2331ebb04399134c03171509371a"}, + {file = "ujson-5.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:3b27a8da7a080add559a3b73ec9ebd52e82cc4419f7c6fb7266e62439a055ed0"}, + {file = "ujson-5.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:193349a998cd821483a25f5df30b44e8f495423840ee11b3b28df092ddfd0f7f"}, + {file = "ujson-5.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ddeabbc78b2aed531f167d1e70387b151900bc856d61e9325fcdfefb2a51ad8"}, + {file = "ujson-5.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ce24909a9c25062e60653073dd6d5e6ec9d6ad7ed6e0069450d5b673c854405"}, + {file = "ujson-5.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27a2a3c7620ebe43641e926a1062bc04e92dbe90d3501687957d71b4bdddaec4"}, + {file = "ujson-5.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b852bdf920fe9f84e2a2c210cc45f1b64f763b4f7d01468b33f7791698e455e"}, + {file = "ujson-5.8.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:20768961a6a706170497129960762ded9c89fb1c10db2989c56956b162e2a8a3"}, + {file = "ujson-5.8.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e0147d41e9fb5cd174207c4a2895c5e24813204499fd0839951d4c8784a23bf5"}, + {file = "ujson-5.8.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e3673053b036fd161ae7a5a33358ccae6793ee89fd499000204676baafd7b3aa"}, + {file = "ujson-5.8.0-cp311-cp311-win32.whl", hash = "sha256:a89cf3cd8bf33a37600431b7024a7ccf499db25f9f0b332947fbc79043aad879"}, + {file = "ujson-5.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:3659deec9ab9eb19e8646932bfe6fe22730757c4addbe9d7d5544e879dc1b721"}, + {file = "ujson-5.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:102bf31c56f59538cccdfec45649780ae00657e86247c07edac434cb14d5388c"}, + {file = "ujson-5.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:299a312c3e85edee1178cb6453645217ba23b4e3186412677fa48e9a7f986de6"}, + {file = "ujson-5.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e385a7679b9088d7bc43a64811a7713cc7c33d032d020f757c54e7d41931ae"}, + {file = "ujson-5.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad24ec130855d4430a682c7a60ca0bc158f8253ec81feed4073801f6b6cb681b"}, + {file = "ujson-5.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16fde596d5e45bdf0d7de615346a102510ac8c405098e5595625015b0d4b5296"}, + {file = "ujson-5.8.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6d230d870d1ce03df915e694dcfa3f4e8714369cce2346686dbe0bc8e3f135e7"}, + {file = "ujson-5.8.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9571de0c53db5cbc265945e08f093f093af2c5a11e14772c72d8e37fceeedd08"}, + {file = "ujson-5.8.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7cba16b26efe774c096a5e822e4f27097b7c81ed6fb5264a2b3f5fd8784bab30"}, + {file = "ujson-5.8.0-cp312-cp312-win32.whl", hash = "sha256:48c7d373ff22366eecfa36a52b9b55b0ee5bd44c2b50e16084aa88b9de038916"}, + {file = "ujson-5.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:5ac97b1e182d81cf395ded620528c59f4177eee024b4b39a50cdd7b720fdeec6"}, + {file = "ujson-5.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2a64cc32bb4a436e5813b83f5aab0889927e5ea1788bf99b930fad853c5625cb"}, + {file = "ujson-5.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e54578fa8838ddc722539a752adfce9372474114f8c127bb316db5392d942f8b"}, + {file = "ujson-5.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9721cd112b5e4687cb4ade12a7b8af8b048d4991227ae8066d9c4b3a6642a582"}, + {file = "ujson-5.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d9707e5aacf63fb919f6237d6490c4e0244c7f8d3dc2a0f84d7dec5db7cb54c"}, + {file = "ujson-5.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0be81bae295f65a6896b0c9030b55a106fb2dec69ef877253a87bc7c9c5308f7"}, + {file = "ujson-5.8.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ae7f4725c344bf437e9b881019c558416fe84ad9c6b67426416c131ad577df67"}, + {file = "ujson-5.8.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9ab282d67ef3097105552bf151438b551cc4bedb3f24d80fada830f2e132aeb9"}, + {file = "ujson-5.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:94c7bd9880fa33fcf7f6d7f4cc032e2371adee3c5dba2922b918987141d1bf07"}, + {file = "ujson-5.8.0-cp38-cp38-win32.whl", hash = "sha256:bf5737dbcfe0fa0ac8fa599eceafae86b376492c8f1e4b84e3adf765f03fb564"}, + {file = "ujson-5.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:11da6bed916f9bfacf13f4fc6a9594abd62b2bb115acfb17a77b0f03bee4cfd5"}, + {file = "ujson-5.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:69b3104a2603bab510497ceabc186ba40fef38ec731c0ccaa662e01ff94a985c"}, + {file = "ujson-5.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9249fdefeb021e00b46025e77feed89cd91ffe9b3a49415239103fc1d5d9c29a"}, + {file = "ujson-5.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2873d196725a8193f56dde527b322c4bc79ed97cd60f1d087826ac3290cf9207"}, + {file = "ujson-5.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a4dafa9010c366589f55afb0fd67084acd8added1a51251008f9ff2c3e44042"}, + {file = "ujson-5.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a42baa647a50fa8bed53d4e242be61023bd37b93577f27f90ffe521ac9dc7a3"}, + {file = "ujson-5.8.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f3554eaadffe416c6f543af442066afa6549edbc34fe6a7719818c3e72ebfe95"}, + {file = "ujson-5.8.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fb87decf38cc82bcdea1d7511e73629e651bdec3a43ab40985167ab8449b769c"}, + {file = "ujson-5.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:407d60eb942c318482bbfb1e66be093308bb11617d41c613e33b4ce5be789adc"}, + {file = "ujson-5.8.0-cp39-cp39-win32.whl", hash = "sha256:0fe1b7edaf560ca6ab023f81cbeaf9946a240876a993b8c5a21a1c539171d903"}, + {file = "ujson-5.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:3f9b63530a5392eb687baff3989d0fb5f45194ae5b1ca8276282fb647f8dcdb3"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:efeddf950fb15a832376c0c01d8d7713479fbeceaed1eaecb2665aa62c305aec"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d8283ac5d03e65f488530c43d6610134309085b71db4f675e9cf5dff96a8282"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb0142f6f10f57598655340a3b2c70ed4646cbe674191da195eb0985a9813b83"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07d459aca895eb17eb463b00441986b021b9312c6c8cc1d06880925c7f51009c"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d524a8c15cfc863705991d70bbec998456a42c405c291d0f84a74ad7f35c5109"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d6f84a7a175c75beecde53a624881ff618e9433045a69fcfb5e154b73cdaa377"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b748797131ac7b29826d1524db1cc366d2722ab7afacc2ce1287cdafccddbf1f"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e72ba76313d48a1a3a42e7dc9d1db32ea93fac782ad8dde6f8b13e35c229130"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f504117a39cb98abba4153bf0b46b4954cc5d62f6351a14660201500ba31fe7f"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8c91b6f4bf23f274af9002b128d133b735141e867109487d17e344d38b87d94"}, + {file = "ujson-5.8.0.tar.gz", hash = "sha256:78e318def4ade898a461b3d92a79f9441e7e0e4d2ad5419abed4336d702c7425"}, +] + [[package]] name = "urllib3" version = "2.0.3" @@ -4030,4 +4233,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "9ae4e4236289823b370e24a704d54c886e213a81e03a96bb6381db4e24442ebe" +content-hash = "27fc7d0d9c8ec38c0decc4b0cc4df22004541761134b4b52eef7426ded3ffaaa" diff --git a/python/pyproject.toml b/python/pyproject.toml index 6c6998c71bc3..52550279db65 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -30,6 +30,10 @@ torch = "2.0.0" [tool.poetry.group.chromadb.dependencies] chromadb = "^0.3.23" +[tool.poetry.group.milvus.dependencies] +pymilvus = "^2.2.11" +milvus = "^2.2.11" + [tool.poetry.group.weaviate.dependencies] weaviate-client = "^3.18.0" diff --git a/python/semantic_kernel/connectors/memory/milvus/__init__.py b/python/semantic_kernel/connectors/memory/milvus/__init__.py new file mode 100644 index 000000000000..afaeaa606786 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/milvus/__init__.py @@ -0,0 +1,5 @@ +from semantic_kernel.connectors.memory.milvus.milvus_memory_store import ( + MilvusMemoryStore, +) + +__all__ = ["MilvusMemoryStore"] diff --git a/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py b/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py new file mode 100644 index 000000000000..60ff34fcf260 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py @@ -0,0 +1,469 @@ +# Copyright (c) Microsoft. All rights reserved. + +from logging import Logger +from typing import List, Optional, Tuple + +from numpy import array, expand_dims, ndarray +from pymilvus.milvus_client import milvus_client +from semantic_kernel.memory.memory_record import MemoryRecord +from semantic_kernel.memory.memory_store_base import MemoryStoreBase +from semantic_kernel.utils.null_logger import NullLogger + + +def memoryrecord_to_milvus_dict(mem: MemoryRecord) -> dict: + """Convert a memoryrecord into a dict. + Args: + mem (MemoryRecord): MemoryRecrod to convert. + + Returns: + dict: Dict result. + """ + ret_dict = {} + # Grab all the class vars + for key, val in vars(mem).items(): + if val is not None: + # Remove underscore + ret_dict[key[1:]] = val + return ret_dict + + +def milvus_dict_to_memoryrecord(milvus_dict: dict) -> MemoryRecord: + """Convert Milvus search result dict into MemoryRecord. + + Args: + milvus_dict (dict): Search hit + + Returns: + MemoryRecord + """ + # Embedding needs conversion to numpy array + embedding = milvus_dict.get("embedding", None) + if embedding is not None: + embedding = array(embedding) + return MemoryRecord( + is_reference=milvus_dict.get("is_reference", None), + external_source_name=milvus_dict.get("external_source_name", None), + id=milvus_dict.get("id", None), + description=milvus_dict.get("description", None), + text=milvus_dict.get("text", None), + additional_metadata=milvus_dict.get("additional_metadata", None), + embedding=embedding, + key=milvus_dict.get("key", None), + timestamp=milvus_dict.get("timestamp", None), + ) + + +# Default field values +ID_FIELD = "id" +ID_TYPE = "str" +EMBEDDING_FIELD = "embedding" + + +class MilvusMemoryStore(MemoryStoreBase): + def __init__( + self, + uri: str = "http://localhost:19530", + token: Optional[str] = None, + logger: Optional[Logger] = None, + ) -> None: + """MilvusMemoryStore allows for searching for records using Milvus/Zilliz Cloud. + + For more details on how to get the service started, take a look here: + Milvus: https://milvus.io/docs/get_started.md + Zilliz Cloud: https://docs.zilliz.com/docs/quick-start + + + Args: + uri (str, optional): The uri of the cluster. Defaults to + "http://localhost:19530". + token (Optional[str], optional): The token to connect to the cluster if + authentication is required. Defaults to None. + logger (Optional[Logger], optional): Logger to use. Defaults to None. + """ + self._uri = uri + self._token = (token,) + self._logger = logger or NullLogger() + self._client = milvus_client.MilvusClient( + uri=uri, + token=token, + ) + self._metric_cache = {} + + async def create_collection_async( + self, + collection_name: str, + dimension_num: Optional[int] = 1536, + distance_type: Optional[str] = "IP", + overwrite: bool = False, + consistency: str = "Session", + ) -> None: + """Create a Milvus collection. + + Args: + collection_name (str): The name of the collection. + dimension_num (Optional[int], optional): The size of the embeddings being + stored. Defaults to 1536. + distance_type (Optional[str], optional): Which distance function, at the + moment only "IP" and "L2" are supported. Defaults to "IP". + overwrite (bool, optional): Whether to overwrite any existing collection + with the same name. Defaults to False. + consistency (str, optional): Which consistency level to use: + Strong, Session, Bounded, Eventually. Defaults to "Session". + """ + if collection_name in self._client.list_collections(): + if overwrite: + self._client.drop_collection(collection_name=collection_name) + self._client.create_collection( + collection_name=collection_name, + dimension=dimension_num, + primary_field_name=ID_FIELD, + id_type=ID_TYPE, + auto_id=False, + vector_field_name=EMBEDDING_FIELD, + metric_type=distance_type, + max_length=65_535, + consistency_level=consistency, + ) + else: + self._client.create_collection( + collection_name=collection_name, + dimension=dimension_num, + primary_field_name=ID_FIELD, + id_type=ID_TYPE, + auto_id=False, + vector_field_name=EMBEDDING_FIELD, + metric_type=distance_type, + max_length=65_535, + consistency_level=consistency, + ) + + async def get_collections_async( + self, + ) -> List[str]: + """Return a list of present collections. + + Returns: + List[str]: List of collection names. + """ + return self._client.list_collections() + + async def delete_collection_async( + self, collection_name: str = "", all: bool = False + ) -> None: + """Delete the speficied collection. + + If all is True, all collections in the cluster will be removed. + + Args: + collection_name (str, optional): The name of the collection to delete. Defaults to "". + all (bool, optional): Whether to delete all collections. Defaults to False. + """ + cols = self._client.list_collections() + if all: + for x in cols: + self._client.drop_collection(x) + elif collection_name in cols: + self._client.drop_collection(collection_name) + + async def does_collection_exist_async(self, collection_name: str) -> bool: + """Return if the collection exists in the cluster. + + Args: + collection_name (str): The name of the collection. + + Returns: + bool: True if it exists, False otherwise. + """ + return True if collection_name in self._client.list_collections() else False + + async def upsert_async(self, collection_name: str, record: MemoryRecord) -> str: + """Upsert a single MemoryRecord into the collection. + + Args: + collection_name (str): The name of the collection. + record (MemoryRecord): The record to store. + + Returns: + str: The ID of the inserted record. + """ + # Use the batch insert with a total batch + res = await self.upsert_batch_async( + collection_name=collection_name, + records=[record], + batch_size=0, + ) + return res[0] + + async def upsert_batch_async( + self, collection_name: str, records: List[MemoryRecord], batch_size=100 + ) -> List[str]: + """_summary_ + + Args: + collection_name (str): The collection name. + records (List[MemoryRecord]): A list of memory records. + batch_size (int, optional): Batch size of the insert, 0 is a batch + size of total size. Defaults to 100. + + Raises: + Exception: Collection doesnt exist. + e: Failed to upsert a record. + + Returns: + List[str]: A list of inserted ID's. + """ + # Check if the collection exists. + if collection_name not in self._client.list_collections(): + self._logger.debug( + f"Collection {collection_name} does not exist, cannot insert." + ) + raise Exception( + f"Collection {collection_name} does not exist, cannot insert." + ) + # Convert the records to dicts + insert_list = [memoryrecord_to_milvus_dict(record) for record in records] + # The ids to remove + delete_ids = [insert[ID_FIELD] for insert in insert_list] + try: + # First delete then insert to have upsert + self._client.delete(collection_name=collection_name, pks=delete_ids) + return self._client.insert( + collection_name=collection_name, data=insert_list, batch_size=batch_size + ) + except Exception as e: + self._logger.debug(f"Upsert failed due to: {e}") + raise e + + async def get_async( + self, collection_name: str, key: str, with_embedding: bool + ) -> MemoryRecord: + """Get the MemoryRecord corresponding to the key. + + Args: + collection_name (str): The collection to get from. + key (str): The ID to grab. + with_embedding (bool): Whether to include the embedding in the results. + + Returns: + MemoryRecord: The MemoryRecord for the key. + """ + res = await self.get_batch_async( + collection_name=collection_name, keys=[key], with_embeddings=with_embedding + ) + return res[0] + + async def get_batch_async( + self, collection_name: str, keys: List[str], with_embeddings: bool + ) -> List[MemoryRecord]: + """Get the MemoryRecords corresponding to the keys + + Args: + collection_name (str): _description_ + keys (List[str]): _description_ + with_embeddings (bool): _description_ + + Raises: + Exception: _description_ + e: _description_ + + Returns: + List[MemoryRecord]: _description_ + """ + # Check if the collection exists + if collection_name not in self._client.list_collections(): + self._logger.debug( + f"Collection {collection_name} does not exist, cannot get." + ) + raise Exception("Collection {collection_name} does not exist, cannot get.") + try: + gets = self._client.get( + collection_name=collection_name, + ids=keys, + output_fields=["*"] if not with_embeddings else ["*", EMBEDDING_FIELD], + ) + return [milvus_dict_to_memoryrecord(get) for get in gets] + except Exception as e: + self._logger.debug(f"Get failed due to: {e}") + raise e + + async def remove_async(self, collection_name: str, key: str) -> None: + """Remove the specified record based on key. + + Args: + collection_name (str): Collection to remove from. + key (str): The key to remove. + """ + await self.remove_batch_async(collection_name=collection_name, keys=[key]) + + async def remove_batch_async(self, collection_name: str, keys: List[str]) -> None: + """Remove multiple records based on keys. + + Args: + collection_name (str): Collection to remove from + keys (List[str]): The list of keys. + + Raises: + Exception: Collection doesnt exist. + e: Failure to remove key. + """ + if collection_name not in self._client.list_collections(): + self._logger.debug( + f"Collection {collection_name} does not exist, cannot remove." + ) + raise Exception( + f"Collection {collection_name} does not exist, cannot remove." + ) + try: + self._client.delete( + collection_name=collection_name, + pks=keys, + ) + except Exception as e: + self._logger.debug(f"Remove failed due to: {e}") + raise e + + def _search(self, collection_name, data, limit, distance_metric): + """Helper function to search with correct distance metric due to bug""" + # TODO Remove after https://github.com/milvus-io/milvus/issues/25504 + # Simple way to select opposite + distance_pairs = { + "l2": "IP", + "ip": "L2", + } + try: + # Try with passed in metric + results = self._client.search( + collection_name=collection_name, + data=data, + limit=limit, + search_params={"metric_type": distance_metric}, + output_fields=["*"], + )[0] + return results, distance_metric + except Exception as e: + self._logger.debug(f"Search failed with IP, testing L2: {e}") + try: + distance_metric = distance_pairs[distance_metric.lower()] + results = self._client.search( + collection_name=collection_name, + data=data, + limit=limit, + search_params={"metric_type": distance_metric}, + output_fields=["*"], + )[0] + return results, distance_metric + except Exception as e: + self._logger.debug(f"Search failed with L2: {e}") + raise e + + async def get_nearest_matches_async( + self, + collection_name: str, + embedding: ndarray, + limit: int, + min_relevance_score: float = None, + with_embeddings: bool = False, + ) -> List[Tuple[MemoryRecord, float]]: + """Find the nearest `limit` matches for an embedding. + + Args: + collection_name (str): The collection to search. + embedding (ndarray): The embedding to search. + limit (int): The total results to display. + min_relevance_score (float, optional): Minimum distance to include. Defaults to None. + with_embeddings (bool, optional): Whether to include embeddings in result. Defaults to False. + + Raises: + Exception: Missing collection + e: Failure to search + + Returns: + List[Tuple[MemoryRecord, float]]: MemoryRecord and distance tuple. + """ + # Check if collection exists + if collection_name not in self._client.list_collections(): + self._logger.debug( + f"Collection {collection_name} does not exist, cannot search." + ) + raise Exception( + f"Collection {collection_name} does not exist, cannot search." + ) + # Search requests takes a list of requests. + if len(embedding.shape) == 1: + embedding = expand_dims(embedding, axis=0) + + # Search based on the cached metric + results, search_type = self._search( + collection_name=collection_name, + data=embedding, + limit=limit, + distance_metric=self._metric_cache.get(collection_name, None) or "IP", + ) + + # Update cached metric + self._metric_cache[collection_name] = search_type + + cleaned_results = [] + + if with_embeddings: + ids = [] + + # Clean up results, filter and get ids for fetch + for x in results: + if min_relevance_score is not None and x["distance"] < min_relevance_score: + continue + cleaned_results.append(x) + if with_embeddings: + ids.append(x[ID_FIELD]) + + if with_embeddings: + try: + vectors = self._client.get( + collection_name=collection_name, + ids=ids, + output_fields=[EMBEDDING_FIELD], + ) + except Exception as e: + self._logger.debug(f"Get embeddings in search failed due to: {e}.") + raise e + + vectors = {res[ID_FIELD]: res[EMBEDDING_FIELD] for res in vectors} + for res in results: + res["entity"][EMBEDDING_FIELD] = vectors[res[ID_FIELD]] + + results = [ + (milvus_dict_to_memoryrecord(result["entity"]), result["distance"]) + for result in results + ] + + return results + + async def get_nearest_match_async( + self, + collection_name: str, + embedding: ndarray, + min_relevance_score: float = None, + with_embedding: bool = False, + ) -> Tuple[MemoryRecord, float]: + """Find the nearest match for an embedding. + + Args: + collection_name (str): The collection to search. + embedding (ndarray): The embedding to search for. + min_relevance_score (float, optional): T. Defaults to 0.0. + with_embedding (bool, optional): Whether to include embedding in result. Defaults to False. + + Returns: + Tuple[MemoryRecord, float]: A tuple of record and distance. + """ + m = await self.get_nearest_matches_async( + collection_name, + embedding, + 1, + min_relevance_score, + with_embedding, + ) + if len(m) > 0: + return m[0] + else: + return None diff --git a/python/tests/integration/connectors/memory/test_milvus.py b/python/tests/integration/connectors/memory/test_milvus.py new file mode 100644 index 000000000000..910b136ca478 --- /dev/null +++ b/python/tests/integration/connectors/memory/test_milvus.py @@ -0,0 +1,216 @@ +# Copyright (c) Microsoft. All rights reserved. + +import numpy as np +import pytest + +from semantic_kernel.connectors.memory.milvus import MilvusMemoryStore +from semantic_kernel.memory.memory_record import MemoryRecord + +try: + from milvus import default_server # noqa: F401 + + default_server.start() + URI = "http://127.0.0.1:" + str(default_server.listen_port) + TOKEN = None + milvus_installed = True + + +except Exception: + milvus_installed = False + +pytestmark = pytest.mark.skipif( + not milvus_installed, reason="local milvus is not installed" +) + + +@pytest.fixture +def memory_record1(): + return MemoryRecord( + id="test_id1", + text="sample text1", + is_reference=False, + embedding=np.array([0.5, 0.5]), + description="description", + external_source_name="external source", + additional_metadata="additional metadata", + timestamp="timestamp", + ) + + +@pytest.fixture +def memory_record2(): + return MemoryRecord( + id="test_id2", + text="sample text2", + is_reference=False, + embedding=np.array([0.25, 0.75]), + description="description", + external_source_name="external source", + additional_metadata="additional metadata", + timestamp="timestamp", + ) + + +@pytest.mark.asyncio +async def test_create_and_get_collection_async(): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + await memory.create_collection_async("test_collection", 2) + result = await memory.get_collections_async() + assert result == ["test_collection"] + + +@pytest.mark.asyncio +async def test_get_collections_async(): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + await memory.create_collection_async("test_collection1", 2) + await memory.create_collection_async("test_collection2", 2) + await memory.create_collection_async("test_collection3", 2) + result = await memory.get_collections_async() + assert len(result) == 3 + + +@pytest.mark.asyncio +async def test_delete_collection_async(): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + await memory.create_collection_async("test_collection", 2) + await memory.delete_collection_async("test_collection", 2) + result = await memory.get_collections_async() + assert len(result) == 0 + + await memory.create_collection_async("test_collection", 2) + await memory.delete_collection_async("TEST_COLLECTION", 2) + result = await memory.get_collections_async() + assert len(result) == 0 + + +@pytest.mark.asyncio +async def test_does_collection_exist_async(): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + await memory.create_collection_async("test_collection", 2) + result = await memory.does_collection_exist_async("test_collection") + assert result is True + + result = await memory.does_collection_exist_async("TEST_COLLECTION") + assert result is False + + +@pytest.mark.asyncio +async def test_upsert_and_get_async(memory_record1): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + + await memory.create_collection_async("test_collection", 2) + await memory.upsert_async("test_collection", memory_record1) + + result = await memory.get_async("test_collection", "test_id1", True) + assert result._id == "test_id1" + assert result._text == "sample text1" + assert result._is_reference is False + assert np.array_equal(result.embedding, np.array([0.5, 0.5])) + assert result._description == "description" + assert result._external_source_name == "external source" + assert result._additional_metadata == "additional metadata" + assert result._timestamp == "timestamp" + + +@pytest.mark.asyncio +async def test_upsert_and_get_async_with_no_embedding(memory_record1): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + await memory.create_collection_async("test_collection", 2) + + await memory.upsert_async("test_collection", memory_record1) + + result = await memory.get_async("test_collection", "test_id1", False) + assert result._id == "test_id1" + assert result._text == "sample text1" + assert result._is_reference is False + assert result.embedding is None + assert result._description == "description" + assert result._external_source_name == "external source" + assert result._additional_metadata == "additional metadata" + assert result._timestamp == "timestamp" + + +@pytest.mark.asyncio +async def test_upsert_and_get_batch_async(memory_record1, memory_record2): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + await memory.create_collection_async("test_collection", 2) + + await memory.upsert_batch_async("test_collection", [memory_record1, memory_record2]) + + result = await memory.get_batch_async( + "test_collection", ["test_id1", "test_id2"], True + ) + assert len(result) == 2 + assert result[0]._id == "test_id1" + assert result[0]._text == "sample text1" + assert result[0]._is_reference is False + assert np.array_equal(result[0].embedding, np.array([0.5, 0.5])) + assert result[0]._description == "description" + assert result[0]._external_source_name == "external source" + assert result[0]._additional_metadata == "additional metadata" + assert result[0]._timestamp == "timestamp" + + +@pytest.mark.asyncio +async def test_remove_async(memory_record1): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + await memory.create_collection_async("test_collection", 2) + + await memory.upsert_async("test_collection", memory_record1) + await memory.remove_async("test_collection", "test_id1") + + # memory.get_async should raise Exception if record is not found + with pytest.raises(Exception): + await memory.get_async("test_collection", "test_id1", True) + + +@pytest.mark.asyncio +async def test_remove_batch_async(memory_record1, memory_record2): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + await memory.create_collection_async("test_collection", 2) + + await memory.upsert_batch_async("test_collection", [memory_record1, memory_record2]) + await memory.remove_batch_async("test_collection", ["test_id1", "test_id2"]) + + result = await memory.get_batch_async( + "test_collection", ["test_id1", "test_id2"], True + ) + assert result == [] + + +@pytest.mark.asyncio +async def test_get_nearest_matches_async(memory_record1, memory_record2): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + await memory.create_collection_async("test_collection", 2) + await memory.upsert_batch_async("test_collection", [memory_record1, memory_record2]) + results = await memory.get_nearest_matches_async( + "test_collection", np.array([0.5, 0.5]), limit=2 + ) + assert len(results) == 2 + assert isinstance(results[0][0], MemoryRecord) + assert results[0][1] == pytest.approx(0.5, abs=1e-5) + + +@pytest.mark.asyncio +async def test_get_nearest_match_async(memory_record1, memory_record2): + memory = MilvusMemoryStore(uri=URI, token=TOKEN) + await memory.delete_collection_async(all=True) + await memory.create_collection_async("test_collection", 2) + await memory.upsert_batch_async("test_collection", [memory_record1, memory_record2]) + + result = await memory.get_nearest_match_async( + "test_collection", np.array([0.5, 0.5]) + ) + assert len(result) == 2 + assert isinstance(result[0], MemoryRecord) + assert result[1] == pytest.approx(0.5, abs=1e-5) From d2bde85ed740d8cfc3aeb3914195dc2423094fff Mon Sep 17 00:00:00 2001 From: Abby Harrison <54643756+awharrison-28@users.noreply.github.com> Date: Fri, 14 Jul 2023 12:27:22 -0700 Subject: [PATCH 2/7] Update python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py --- .../connectors/memory/milvus/milvus_memory_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py b/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py index 60ff34fcf260..42e66563ed2d 100644 --- a/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py +++ b/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py @@ -13,7 +13,7 @@ def memoryrecord_to_milvus_dict(mem: MemoryRecord) -> dict: """Convert a memoryrecord into a dict. Args: - mem (MemoryRecord): MemoryRecrod to convert. + mem (MemoryRecord): MemoryRecord to convert. Returns: dict: Dict result. From 382474132e119a119136eaa7e467e9db0535cd43 Mon Sep 17 00:00:00 2001 From: Abby Harrison Date: Fri, 14 Jul 2023 12:27:51 -0700 Subject: [PATCH 3/7] pre-commit checks --- .../connectors/memory/milvus/milvus_memory_store.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py b/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py index 60ff34fcf260..44986004e7a5 100644 --- a/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py +++ b/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py @@ -5,6 +5,7 @@ from numpy import array, expand_dims, ndarray from pymilvus.milvus_client import milvus_client + from semantic_kernel.memory.memory_record import MemoryRecord from semantic_kernel.memory.memory_store_base import MemoryStoreBase from semantic_kernel.utils.null_logger import NullLogger From e7c65b4a29c4f269750d15354706aca39197316d Mon Sep 17 00:00:00 2001 From: Filip Haltmayer Date: Fri, 14 Jul 2023 16:11:52 -0700 Subject: [PATCH 4/7] Updating Fixture for Milvus Signed-off-by: Filip Haltmayer --- .../connectors/memory/test_milvus.py | 52 ++++++++++++------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/python/tests/integration/connectors/memory/test_milvus.py b/python/tests/integration/connectors/memory/test_milvus.py index 910b136ca478..bf0be8c3d4d5 100644 --- a/python/tests/integration/connectors/memory/test_milvus.py +++ b/python/tests/integration/connectors/memory/test_milvus.py @@ -8,14 +8,9 @@ try: from milvus import default_server # noqa: F401 - - default_server.start() - URI = "http://127.0.0.1:" + str(default_server.listen_port) - TOKEN = None + import pymilvus milvus_installed = True - - -except Exception: +except ImportError: milvus_installed = False pytestmark = pytest.mark.skipif( @@ -23,6 +18,16 @@ ) +@pytest.fixture(scope="module") +def setup_milvus(): + default_server.start() + host = "http://127.0.0.1:" + str(default_server.listen_port) + port = None + yield host, port + default_server.stop() + default_server.cleanup() + + @pytest.fixture def memory_record1(): return MemoryRecord( @@ -52,7 +57,8 @@ def memory_record2(): @pytest.mark.asyncio -async def test_create_and_get_collection_async(): +async def test_create_and_get_collection_async(setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) await memory.create_collection_async("test_collection", 2) @@ -61,7 +67,8 @@ async def test_create_and_get_collection_async(): @pytest.mark.asyncio -async def test_get_collections_async(): +async def test_get_collections_async(setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) await memory.create_collection_async("test_collection1", 2) @@ -72,7 +79,8 @@ async def test_get_collections_async(): @pytest.mark.asyncio -async def test_delete_collection_async(): +async def test_delete_collection_async(setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) await memory.create_collection_async("test_collection", 2) @@ -87,7 +95,8 @@ async def test_delete_collection_async(): @pytest.mark.asyncio -async def test_does_collection_exist_async(): +async def test_does_collection_exist_async(setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) await memory.create_collection_async("test_collection", 2) @@ -99,7 +108,8 @@ async def test_does_collection_exist_async(): @pytest.mark.asyncio -async def test_upsert_and_get_async(memory_record1): +async def test_upsert_and_get_async(memory_record1, setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) @@ -118,7 +128,8 @@ async def test_upsert_and_get_async(memory_record1): @pytest.mark.asyncio -async def test_upsert_and_get_async_with_no_embedding(memory_record1): +async def test_upsert_and_get_async_with_no_embedding(memory_record1, setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) await memory.create_collection_async("test_collection", 2) @@ -137,7 +148,8 @@ async def test_upsert_and_get_async_with_no_embedding(memory_record1): @pytest.mark.asyncio -async def test_upsert_and_get_batch_async(memory_record1, memory_record2): +async def test_upsert_and_get_batch_async(memory_record1, memory_record2, setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) await memory.create_collection_async("test_collection", 2) @@ -159,7 +171,8 @@ async def test_upsert_and_get_batch_async(memory_record1, memory_record2): @pytest.mark.asyncio -async def test_remove_async(memory_record1): +async def test_remove_async(memory_record1, setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) await memory.create_collection_async("test_collection", 2) @@ -173,7 +186,8 @@ async def test_remove_async(memory_record1): @pytest.mark.asyncio -async def test_remove_batch_async(memory_record1, memory_record2): +async def test_remove_batch_async(memory_record1, memory_record2, setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) await memory.create_collection_async("test_collection", 2) @@ -188,7 +202,8 @@ async def test_remove_batch_async(memory_record1, memory_record2): @pytest.mark.asyncio -async def test_get_nearest_matches_async(memory_record1, memory_record2): +async def test_get_nearest_matches_async(memory_record1, memory_record2, setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) await memory.create_collection_async("test_collection", 2) @@ -202,7 +217,8 @@ async def test_get_nearest_matches_async(memory_record1, memory_record2): @pytest.mark.asyncio -async def test_get_nearest_match_async(memory_record1, memory_record2): +async def test_get_nearest_match_async(memory_record1, memory_record2, setup_milvus): + URI, TOKEN = setup_milvus memory = MilvusMemoryStore(uri=URI, token=TOKEN) await memory.delete_collection_async(all=True) await memory.create_collection_async("test_collection", 2) From 0c5f28856c39cfb55bda551444b364c9ed64ed1e Mon Sep 17 00:00:00 2001 From: Abby Harrison Date: Mon, 17 Jul 2023 10:40:14 -0700 Subject: [PATCH 5/7] pre-commit checks --- python/tests/integration/connectors/memory/test_milvus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tests/integration/connectors/memory/test_milvus.py b/python/tests/integration/connectors/memory/test_milvus.py index bf0be8c3d4d5..d21c3e9352ed 100644 --- a/python/tests/integration/connectors/memory/test_milvus.py +++ b/python/tests/integration/connectors/memory/test_milvus.py @@ -8,7 +8,7 @@ try: from milvus import default_server # noqa: F401 - import pymilvus + milvus_installed = True except ImportError: milvus_installed = False @@ -26,7 +26,7 @@ def setup_milvus(): yield host, port default_server.stop() default_server.cleanup() - + @pytest.fixture def memory_record1(): From 3bb4af47537fa191bd1a827a99c6e3494b57fdaa Mon Sep 17 00:00:00 2001 From: Abby Harrison <54643756+awharrison-28@users.noreply.github.com> Date: Tue, 18 Jul 2023 08:43:17 -0700 Subject: [PATCH 6/7] Update python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py --- .../connectors/memory/milvus/milvus_memory_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py b/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py index c1e01b598586..709fd14cedac 100644 --- a/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py +++ b/python/semantic_kernel/connectors/memory/milvus/milvus_memory_store.py @@ -151,7 +151,7 @@ async def get_collections_async( async def delete_collection_async( self, collection_name: str = "", all: bool = False ) -> None: - """Delete the speficied collection. + """Delete the specified collection. If all is True, all collections in the cluster will be removed. From 6916b66112888529c642b473a17f134cff673adb Mon Sep 17 00:00:00 2001 From: Filip Haltmayer Date: Tue, 18 Jul 2023 14:10:24 -0700 Subject: [PATCH 7/7] preclean test milvus Signed-off-by: Filip Haltmayer --- python/tests/integration/connectors/memory/test_milvus.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tests/integration/connectors/memory/test_milvus.py b/python/tests/integration/connectors/memory/test_milvus.py index d21c3e9352ed..ebdb1b7d5bff 100644 --- a/python/tests/integration/connectors/memory/test_milvus.py +++ b/python/tests/integration/connectors/memory/test_milvus.py @@ -20,6 +20,7 @@ @pytest.fixture(scope="module") def setup_milvus(): + default_server.cleanup() default_server.start() host = "http://127.0.0.1:" + str(default_server.listen_port) port = None