From 29f5221584f6a098d95aae5d3779df6af26c01ff Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 10 Jan 2024 04:17:45 -0500 Subject: [PATCH] WIP --- .github/actions/test-data-plane/action.yaml | 4 +- pinecone/data/vector_factory.py | 5 +- pinecone/utils/convert_to_list.py | 2 +- tests/integration/control/__init__.py | 0 tests/integration/data/__init__.py | 0 tests/integration/data/conftest.py | 32 +-- tests/integration/data/cosine/__init__.py | 0 tests/integration/data/cosine/test_upsert.py | 101 ++++++---- .../data/cosine/test_upsert_errors.py | 183 ++++++++++++++++++ tests/integration/data/cosine/utils.py | 4 + tests/integration/helpers/__init__.py | 1 + tests/integration/helpers/helpers.py | 16 ++ tests/unit/utils/test_convert_to_list.py | 1 + 13 files changed, 298 insertions(+), 51 deletions(-) create mode 100644 tests/integration/control/__init__.py create mode 100644 tests/integration/data/__init__.py create mode 100644 tests/integration/data/cosine/__init__.py create mode 100644 tests/integration/data/cosine/test_upsert_errors.py create mode 100644 tests/integration/data/cosine/utils.py diff --git a/.github/actions/test-data-plane/action.yaml b/.github/actions/test-data-plane/action.yaml index 5e2c8134..1e7d184c 100644 --- a/.github/actions/test-data-plane/action.yaml +++ b/.github/actions/test-data-plane/action.yaml @@ -21,7 +21,7 @@ inputs: description: 'Whether to use gRPC or REST' required: false default: 'true' - freshness_sleep_seconds: + freshness_timeout_seconds: description: 'The number of seconds to wait for the index to become fresh' required: false default: '60' @@ -54,4 +54,4 @@ runs: USE_GRPC: ${{ inputs.use_grpc }} METRIC: ${{ inputs.metric }} SPEC: ${{ inputs.spec }} - FRESHNESS_SLEEP_SECONDS: ${{ inputs.freshness_sleep_seconds }} \ No newline at end of file + FRESHNESS_TIMEOUT_SECONDS: ${{ inputs.freshness_timeout_seconds }} \ No newline at end of file diff --git a/pinecone/data/vector_factory.py b/pinecone/data/vector_factory.py index 0b38a8ca..869d8ee9 100644 --- a/pinecone/data/vector_factory.py +++ b/pinecone/data/vector_factory.py @@ -64,7 +64,10 @@ def _tuple_to_vector(item, check_type: bool) -> Vector: if len(item) < 2 or len(item) > 3: raise VectorTupleLengthError(item) id, values, metadata = fix_tuple_length(item, 3) - return Vector(id=id, values=convert_to_list(values), metadata=metadata or {}, _check_type=check_type) + if isinstance(values, SparseValues): + raise ValueError("Sparse values are not supported in tuples. Please use either dicts or a Vector objects as inputs.") + else: + return Vector(id=id, values=convert_to_list(values), metadata=metadata or {}, _check_type=check_type) @staticmethod def _dict_to_vector(item, check_type: bool) -> Vector: diff --git a/pinecone/utils/convert_to_list.py b/pinecone/utils/convert_to_list.py index ecaf4590..ce28a9f5 100644 --- a/pinecone/utils/convert_to_list.py +++ b/pinecone/utils/convert_to_list.py @@ -3,7 +3,7 @@ def convert_to_list(obj): if class_name == 'list': return obj - elif hasattr(obj, 'tolist'): + elif hasattr(obj, 'tolist') and callable(getattr(obj, 'tolist')): return obj.tolist() else: return list(obj) \ No newline at end of file diff --git a/tests/integration/control/__init__.py b/tests/integration/control/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/data/__init__.py b/tests/integration/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/data/conftest.py b/tests/integration/data/conftest.py index 4bdce120..58cd3484 100644 --- a/tests/integration/data/conftest.py +++ b/tests/integration/data/conftest.py @@ -11,11 +11,11 @@ # - environment: free vs paid # - with metadata vs without metadata -@pytest.fixture +@pytest.fixture(scope='session') def api_key(): return get_environment_var('PINECONE_API_KEY') -@pytest.fixture +@pytest.fixture(scope='session') def client(api_key): use_grpc = os.environ.get('USE_GRPC', 'false') == 'true' if use_grpc: @@ -25,27 +25,35 @@ def client(api_key): from pinecone import Pinecone return Pinecone(api_key=api_key) -@pytest.fixture +@pytest.fixture(scope='session') def metric(): return get_environment_var('METRIC', 'cosine') -@pytest.fixture +@pytest.fixture(scope='session') def spec(): return json.loads(get_environment_var('SPEC')) -@pytest.fixture +@pytest.fixture(scope='session') def index_name(): return 'dataplane-' + random_string(20) +@pytest.fixture(scope='session') +def index_host(client, index_name, metric, spec): + client.create_index( + name=index_name, + dimension=2, + metric=metric, + spec=spec + ) + description = client.describe_index(name=index_name) + return description.host + +# Namespaces not scoped to session; each test can have its own namespace +# to avoid collisions @pytest.fixture def namespace(): return random_string(10) @pytest.fixture -def index_host(client, index_name, metric, spec): - client.create_index(name=index_name, dimension=2, metric=metric, spec=spec) - description = client.describe_index(name=index_name) - return description.host - -def sleep_t(): - return int(os.environ.get('FRESHNESS_SLEEP_SECONDS', 60)) \ No newline at end of file +def idx(client, index_name, index_host): + return client.Index(name=index_name, host=index_host) \ No newline at end of file diff --git a/tests/integration/data/cosine/__init__.py b/tests/integration/data/cosine/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/data/cosine/test_upsert.py b/tests/integration/data/cosine/test_upsert.py index 3e42b983..8c6e0d5c 100644 --- a/tests/integration/data/cosine/test_upsert.py +++ b/tests/integration/data/cosine/test_upsert.py @@ -1,54 +1,85 @@ import pytest -import time -from pinecone import Vector +import os +import random +from pinecone import Vector, SparseValues +from ...helpers import poll_stats_for_namespace -def test_upsert_to_default_namespace(client, index_name, sleep_t): - expected_dimension = 2 - desc = client.describe_index(index_name) - assert desc.dimension == expected_dimension - assert desc.metric == 'cosine' - - idx = client.Index(index_name) +@pytest.mark.parametrize('use_nondefault_namespace', [True, False]) +def test_upsert_to_namespace( + idx, + namespace, + use_nondefault_namespace +): + target_namespace = namespace if use_nondefault_namespace else '' # Upsert with tuples idx.upsert(vectors=[ - ('1', [1.0, 2.0]), - ('2', [3.0, 4.0]), - ('3', [5.0, 6.0]) - ]) + ('1', embedding_values()), + ('2', embedding_values()), + ('3', embedding_values()) + ], + namespace=target_namespace + ) # Upsert with objects idx.upsert(vectors=[ - Vector('4', [7.0, 8.0]), - Vector('5', [9.0, 10.0]), - Vector('6', [11.0, 12.0]) - ]) + Vector(id='4', values=embedding_values()), + Vector(id='5', values=embedding_values()), + Vector(id='6', values=embedding_values()) + ], + namespace=target_namespace + ) # Upsert with dict idx.upsert(vectors=[ - {'id': '7', 'values': [13.0, 14.0]}, - {'id': '8', 'values': [15.0, 16.0]}, - {'id': '9', 'values': [17.0, 18.0]} - ]) + {'id': '7', 'values': embedding_values()}, + {'id': '8', 'values': embedding_values()}, + {'id': '9', 'values': embedding_values()} + ], + namespace=target_namespace + ) - time.sleep(sleep_t) + poll_stats_for_namespace(idx, target_namespace) # Check the vector count reflects some data has been upserted stats = idx.describe_index_stats() - assert stats.vector_count == 9 - + assert stats.total_vector_count >= 9 + assert stats.namespaces[target_namespace].vector_count == 9 -def test_upsert_to_custom_namespace(client, index_name, namespace): - expected_dimension = 2 - assert client.describe_index(index_name).dimension == expected_dimension +@pytest.mark.parametrize('use_nondefault_namespace', [True, False]) +@pytest.mark.skipif(os.getenv('METRIC') != 'dotproduct', reason='Only metric=dotprodouct indexes support hybrid') +def test_upsert_to_namespace_with_sparse_embedding_values( + idx, + namespace, + use_nondefault_namespace +): + target_namespace = namespace if use_nondefault_namespace else '' - idx = client.Index(index_name) - - # Upsert with tuples + # Upsert with sparse values object idx.upsert(vectors=[ - ('1', [1.0, 2.0]), - ('2', [3.0, 4.0]), - ('3', [5.0, 6.0]) - ], - namespace=namespace + Vector( + id='1', + sparse_values=SparseValues( + indices=[0,1], + values=embedding_values() + ) + ), + ], + namespace=target_namespace ) + + # Upsert with sparse values dict + idx.upsert(vectors=[ + {'id': '2', 'sparse_values': {'indices': [0,1], 'values': embedding_values()}}, + {'id': '3', 'sparse_values': {'indices': [0,1], 'values': embedding_values()}} + ], + namespace=target_namespace + ) + + poll_stats_for_namespace(idx, target_namespace) + + # Check the vector count reflects some data has been upserted + stats = idx.describe_index_stats() + assert stats.total_vector_count >= 9 + assert stats.namespaces[target_namespace].vector_count == 9 + diff --git a/tests/integration/data/cosine/test_upsert_errors.py b/tests/integration/data/cosine/test_upsert_errors.py new file mode 100644 index 00000000..69023280 --- /dev/null +++ b/tests/integration/data/cosine/test_upsert_errors.py @@ -0,0 +1,183 @@ +import pytest +import os +from pinecone import Vector, SparseValues +from ...helpers import poll_stats_for_namespace +from .utils import embedding_values +from pinecone import PineconeApiTypeError, PineconeApiException + +class TestUpsertFailsWhenDimensionMismatch(): + def test_upsert_fails_when_dimension_mismatch_objects(self, idx): + with pytest.raises(PineconeApiException): + idx.upsert(vectors=[ + Vector(id='1', values=embedding_values(2)), + Vector(id='2', values=embedding_values(3)) + ]) + + def test_upsert_fails_when_dimension_mismatch_tuples(self, idx): + with pytest.raises(PineconeApiException): + idx.upsert(vectors=[ + ('1', embedding_values(2)), + ('2', embedding_values(3)) + ]) + + def test_upsert_fails_when_dimension_mismatch_dicts(self, idx): + with pytest.raises(PineconeApiException): + idx.upsert(vectors=[ + {'id': '1', 'values': embedding_values(2)}, + {'id': '2', 'values': embedding_values(3)} + ]) + +@pytest.mark.skipif(os.getenv('METRIC') != 'dotproduct', reason='Only metric=dotprodouct indexes support hybrid') +class TestUpsertFailsSparseValuesDimensionMismatch(): + def test_upsert_fails_when_sparse_values_indices_out_of_range_objects(self, idx): + with pytest.raises(PineconeApiException): + idx.upsert(vectors=[ + Vector(id='1', values=[0.1, 0.1], sparse_values=SparseValues(indices=[0], values=[0.5])), + Vector(id='2', values=[0.1, 0.1], sparse_values=SparseValues(indices=[0, 1, 2], values=[0.5, 0.5, 0.5])) + ]) + + def test_upsert_fails_when_sparse_values_indices_values_mismatch_objects(self, idx): + with pytest.raises(PineconeApiException): + idx.upsert(vectors=[ + Vector(id='1', values=[0.1, 0.1], sparse_values=SparseValues(indices=[0], values=[0.5, 0.5])) + ]) + with pytest.raises(PineconeApiException): + idx.upsert(vectors=[ + Vector(id='1', values=[0.1, 0.1], sparse_values=SparseValues(indices=[0, 1], values=[0.5])) + ]) + + def test_upsert_fails_when_sparse_values_indices_out_of_range_tuples(self, idx): + with pytest.raises(ValueError): + idx.upsert(vectors=[ + ('1', SparseValues(indices=[0], values=[0.5])), + ('2', SparseValues(indices=[0, 1, 2], values=[0.5, 0.5, 0.5])) + ]) + + def test_upsert_fails_when_sparse_values_indices_values_mismatch_tuples(self, idx): + with pytest.raises(ValueError): + idx.upsert(vectors=[ + ('1', SparseValues(indices=[0], values=[0.5, 0.5])) + ]) + with pytest.raises(ValueError): + idx.upsert(vectors=[ + ('1', SparseValues(indices=[0, 1], values=[0.5])) + ]) + + def test_upsert_fails_when_sparse_values_indices_out_of_range_dicts(self, idx): + with pytest.raises(PineconeApiException): + idx.upsert(vectors=[ + {'id': '1', 'values': [], 'sparse_values': SparseValues(indices=[0], values=[0.5])}, + {'id': '2', 'values': [], 'sparse_values': SparseValues(indices=[0, 1, 2], values=[0.5, 0.5, 0.5])} + ]) + + def test_upsert_fails_when_sparse_values_indices_values_mismatch_dicts(self, idx): + with pytest.raises(PineconeApiException): + idx.upsert(vectors=[ + {'id': '1', 'values': [], 'sparse_values': SparseValues(indices=[0], values=[0.5, 0.5])} + ]) + with pytest.raises(PineconeApiException): + idx.upsert(vectors=[ + {'id': '1', 'values': [], 'sparse_values': SparseValues(indices=[0, 1], values=[0.5])} + ]) + +class TestUpsertFailsWhenValuesMissing(): + def test_upsert_fails_when_values_missing_objects(self, idx): + with pytest.raises(TypeError): + idx.upsert(vectors=[ + Vector(id='1'), + Vector(id='2') + ]) + + def test_upsert_fails_when_values_missing_tuples(self, idx): + with pytest.raises(ValueError): + idx.upsert(vectors=[ + ('1',), + ('2',) + ]) + + def test_upsert_fails_when_values_missing_dicts(self, idx): + with pytest.raises(ValueError): + idx.upsert(vectors=[ + {'id': '1'}, + {'id': '2'} + ]) + +class TestUpsertFailsWhenValuesWrongType(): + def test_upsert_fails_when_values_wrong_type_objects(self, idx): + with pytest.raises(PineconeApiTypeError): + idx.upsert(vectors=[ + Vector(id='1', values='abc'), + Vector(id='2', values='def') + ]) + + def test_upsert_fails_when_values_wrong_type_tuples(self, idx): + with pytest.raises(PineconeApiTypeError): + idx.upsert(vectors=[ + ('1', 'abc'), + ('2', 'def') + ]) + + def test_upsert_fails_when_values_wrong_type_dicts(self, idx): + with pytest.raises(TypeError): + idx.upsert(vectors=[ + {'id': '1', 'values': 'abc'}, + {'id': '2', 'values': 'def'} + ]) + +class TestUpsertFailsWhenVectorsMissing(): + def test_upsert_fails_when_vectors_empty(self, idx): + with pytest.raises(PineconeApiException): + idx.upsert(vectors=[]) + + def test_upsert_fails_when_vectors_wrong_type(self, idx): + with pytest.raises(ValueError): + idx.upsert(vectors='abc') + + def test_upsert_fails_when_vectors_missing(self, idx): + with pytest.raises(TypeError): + idx.upsert() + +class TestUpsertIdMissing(): + def test_upsert_fails_when_id_is_missing_objects(self, idx): + with pytest.raises(TypeError): + idx.upsert(vectors=[ + Vector(id='1', values=embedding_values()), + Vector(values=embedding_values()) + ]) + + def test_upsert_fails_when_id_is_missing_tuples(self, idx): + with pytest.raises(ValueError): + idx.upsert(vectors=[ + ('1', embedding_values()), + (embedding_values()) + ]) + + def test_upsert_fails_when_id_is_missing_dicts(self, idx): + with pytest.raises(ValueError): + idx.upsert(vectors=[ + {'id': '1', 'values': embedding_values()}, + {'values': embedding_values()} + ]) + + +class TestUpsertIdWrongType(): + def test_upsert_fails_when_id_wrong_type_objects(self, idx): + with pytest.raises(PineconeApiTypeError): + idx.upsert(vectors=[ + Vector(id='1', values=embedding_values()), + Vector(id=2, values=embedding_values()) + ]) + + def test_upsert_fails_when_id_wrong_type_tuples(self, idx): + with pytest.raises(PineconeApiTypeError): + idx.upsert(vectors=[ + ('1', embedding_values()), + (2, embedding_values()) + ]) + + def test_upsert_fails_when_id_wrong_type_dicts(self, idx): + with pytest.raises(PineconeApiTypeError): + idx.upsert(vectors=[ + {'id': '1', 'values': embedding_values()}, + {'id': 2, 'values': embedding_values()} + ]) \ No newline at end of file diff --git a/tests/integration/data/cosine/utils.py b/tests/integration/data/cosine/utils.py new file mode 100644 index 00000000..2045cf0a --- /dev/null +++ b/tests/integration/data/cosine/utils.py @@ -0,0 +1,4 @@ +import random + +def embedding_values(dimension=2): + return [random.random() for _ in range(dimension)] diff --git a/tests/integration/helpers/__init__.py b/tests/integration/helpers/__init__.py index e69de29b..fdd06851 100644 --- a/tests/integration/helpers/__init__.py +++ b/tests/integration/helpers/__init__.py @@ -0,0 +1 @@ +from .helpers import get_environment_var, random_string, poll_stats_for_namespace \ No newline at end of file diff --git a/tests/integration/helpers/helpers.py b/tests/integration/helpers/helpers.py index 1d19e823..0abd92d5 100644 --- a/tests/integration/helpers/helpers.py +++ b/tests/integration/helpers/helpers.py @@ -1,5 +1,6 @@ import re import os +import time import random import string from typing import Any @@ -44,3 +45,18 @@ def get_environment_var(name: str, defaultVal: Any = None) -> str: raise Exception('Expected environment variable ' + name + ' is not set') else: return val + +def poll_stats_for_namespace(idx, namespace, max_sleep=int(os.environ.get('FRESHNESS_TIMEOUT_SECONDS', 60))): + delta_t = 5 + total_time=0 + done = False + while not done: + print(f'Waiting for namespace "{namespace}" to have vectors. Total time waited: {total_time} seconds') + stats = idx.describe_index_stats() + if namespace in stats.namespaces and stats.namespaces[namespace].vector_count > 0: + done = True + elif total_time > max_sleep: + raise TimeoutError(f'Timed out waiting for namespace {namespace} to have vectors') + else: + total_time += delta_t + time.sleep(delta_t) \ No newline at end of file diff --git a/tests/unit/utils/test_convert_to_list.py b/tests/unit/utils/test_convert_to_list.py index 88efc0e3..384e169f 100644 --- a/tests/unit/utils/test_convert_to_list.py +++ b/tests/unit/utils/test_convert_to_list.py @@ -1,5 +1,6 @@ import pytest from pinecone.utils import convert_to_list +from pinecone import SparseValues import numpy as np import pandas as pd