WIP

pinecone-io · Jan 10, 2024 · 29f5221 · 29f5221
1 parent 1fc6d47
commit 29f5221
Show file tree

Hide file tree

Showing 13 changed files with 298 additions and 51 deletions.
diff --git a/.github/actions/test-data-plane/action.yaml b/.github/actions/test-data-plane/action.yaml
@@ -21,7 +21,7 @@ inputs:
     description: 'Whether to use gRPC or REST'
     required: false
     default: 'true'
-  freshness_sleep_seconds:
+  freshness_timeout_seconds:
     description: 'The number of seconds to wait for the index to become fresh'
     required: false
     default: '60'
@@ -54,4 +54,4 @@ runs:
         USE_GRPC: ${{ inputs.use_grpc }}
         METRIC: ${{ inputs.metric }}
         SPEC: ${{ inputs.spec }}
-        FRESHNESS_SLEEP_SECONDS: ${{ inputs.freshness_sleep_seconds }}
+        FRESHNESS_TIMEOUT_SECONDS: ${{ inputs.freshness_timeout_seconds }}
diff --git a/pinecone/data/vector_factory.py b/pinecone/data/vector_factory.py
@@ -64,7 +64,10 @@ def _tuple_to_vector(item, check_type: bool) -> Vector:
         if len(item) < 2 or len(item) > 3:
             raise VectorTupleLengthError(item)
         id, values, metadata = fix_tuple_length(item, 3)
-        return Vector(id=id, values=convert_to_list(values), metadata=metadata or {}, _check_type=check_type)
+        if isinstance(values, SparseValues):
+            raise ValueError("Sparse values are not supported in tuples. Please use either dicts or a Vector objects as inputs.")
+        else:
+            return Vector(id=id, values=convert_to_list(values), metadata=metadata or {}, _check_type=check_type)
 
     @staticmethod
     def _dict_to_vector(item, check_type: bool) -> Vector:

diff --git a/pinecone/utils/convert_to_list.py b/pinecone/utils/convert_to_list.py
@@ -3,7 +3,7 @@ def convert_to_list(obj):
 
     if class_name == 'list':
         return obj
-    elif hasattr(obj, 'tolist'):
+    elif hasattr(obj, 'tolist') and callable(getattr(obj, 'tolist')):
         return obj.tolist()
     else:
         return list(obj)
diff --git a/tests/integration/control/__init__.py b/tests/integration/control/__init__.py
diff --git a/tests/integration/data/__init__.py b/tests/integration/data/__init__.py
diff --git a/tests/integration/data/conftest.py b/tests/integration/data/conftest.py
@@ -11,11 +11,11 @@
 # - environment: free vs paid
 # - with metadata vs without metadata
 
-@pytest.fixture
+@pytest.fixture(scope='session')
 def api_key():
     return get_environment_var('PINECONE_API_KEY')
 
-@pytest.fixture
+@pytest.fixture(scope='session')
 def client(api_key):
     use_grpc = os.environ.get('USE_GRPC', 'false') == 'true'
     if use_grpc:
@@ -25,27 +25,35 @@ def client(api_key):
         from pinecone import Pinecone
         return Pinecone(api_key=api_key)
 
-@pytest.fixture
+@pytest.fixture(scope='session')
 def metric():
     return get_environment_var('METRIC', 'cosine')
 
-@pytest.fixture
+@pytest.fixture(scope='session')
 def spec():
     return json.loads(get_environment_var('SPEC'))
 
-@pytest.fixture
+@pytest.fixture(scope='session')
 def index_name():
     return 'dataplane-' + random_string(20)
 
+@pytest.fixture(scope='session')
+def index_host(client, index_name, metric, spec):
+    client.create_index(
+        name=index_name, 
+        dimension=2, 
+        metric=metric, 
+        spec=spec
+    )
+    description = client.describe_index(name=index_name)
+    return description.host
+
+# Namespaces not scoped to session; each test can have its own namespace
+# to avoid collisions
 @pytest.fixture
 def namespace():
     return random_string(10)
 
 @pytest.fixture
-def index_host(client, index_name, metric, spec):
-    client.create_index(name=index_name, dimension=2, metric=metric, spec=spec)
-    description = client.describe_index(name=index_name)
-    return description.host
-
-def sleep_t():
-    return int(os.environ.get('FRESHNESS_SLEEP_SECONDS', 60))
+def idx(client, index_name, index_host):
+    return client.Index(name=index_name, host=index_host)
diff --git a/tests/integration/data/cosine/__init__.py b/tests/integration/data/cosine/__init__.py
diff --git a/tests/integration/data/cosine/test_upsert.py b/tests/integration/data/cosine/test_upsert.py
@@ -1,54 +1,85 @@
 import pytest
-import time
-from pinecone import Vector
+import os
+import random
+from pinecone import Vector, SparseValues
+from ...helpers import poll_stats_for_namespace
 
-def test_upsert_to_default_namespace(client, index_name, sleep_t):
-    expected_dimension = 2
-    desc = client.describe_index(index_name)
-    assert desc.dimension == expected_dimension
-    assert desc.metric == 'cosine'
-
-    idx = client.Index(index_name)
+@pytest.mark.parametrize('use_nondefault_namespace', [True, False]) 
+def test_upsert_to_namespace(
+    idx, 
+    namespace,
+    use_nondefault_namespace
+):
+    target_namespace = namespace if use_nondefault_namespace else ''
 
     # Upsert with tuples
     idx.upsert(vectors=[
-        ('1', [1.0, 2.0]), 
-        ('2', [3.0, 4.0]),
-        ('3', [5.0, 6.0])
-    ])
+            ('1', embedding_values()), 
+            ('2', embedding_values()),
+            ('3', embedding_values())
+        ], 
+        namespace=target_namespace
+    )
 
     # Upsert with objects
     idx.upsert(vectors=[
-        Vector('4', [7.0, 8.0]),
-        Vector('5', [9.0, 10.0]),
-        Vector('6', [11.0, 12.0])
-    ])
+            Vector(id='4', values=embedding_values()),
+            Vector(id='5', values=embedding_values()),
+            Vector(id='6', values=embedding_values())
+        ], 
+        namespace=target_namespace
+    )
 
     # Upsert with dict
     idx.upsert(vectors=[
-        {'id': '7', 'values': [13.0, 14.0]},
-        {'id': '8', 'values': [15.0, 16.0]},
-        {'id': '9', 'values': [17.0, 18.0]}
-    ])
+            {'id': '7', 'values': embedding_values()},
+            {'id': '8', 'values': embedding_values()},
+            {'id': '9', 'values': embedding_values()}
+        ], 
+        namespace=target_namespace
+    )
 
-    time.sleep(sleep_t)
+    poll_stats_for_namespace(idx, target_namespace)
 
     # Check the vector count reflects some data has been upserted
     stats = idx.describe_index_stats()
-    assert stats.vector_count == 9
-
+    assert stats.total_vector_count >= 9
+    assert stats.namespaces[target_namespace].vector_count == 9
 
-def test_upsert_to_custom_namespace(client, index_name, namespace):
-    expected_dimension = 2
-    assert client.describe_index(index_name).dimension == expected_dimension
+@pytest.mark.parametrize('use_nondefault_namespace', [True, False]) 
+@pytest.mark.skipif(os.getenv('METRIC') != 'dotproduct', reason='Only metric=dotprodouct indexes support hybrid')
+def test_upsert_to_namespace_with_sparse_embedding_values(
+    idx,
+    namespace,
+    use_nondefault_namespace
+):
+    target_namespace = namespace if use_nondefault_namespace else ''
 
-    idx = client.Index(index_name)
-
-    # Upsert with tuples
+    # Upsert with sparse values object
     idx.upsert(vectors=[
-        ('1', [1.0, 2.0]), 
-        ('2', [3.0, 4.0]),
-        ('3', [5.0, 6.0])
-        ], 
-        namespace=namespace
+            Vector(
+                id='1', 
+                sparse_values=SparseValues(
+                    indices=[0,1], 
+                    values=embedding_values()
+                )
+            ),
+        ],
+        namespace=target_namespace
     )
+
+    # Upsert with sparse values dict
+    idx.upsert(vectors=[
+            {'id': '2', 'sparse_values': {'indices': [0,1], 'values': embedding_values()}},
+            {'id': '3', 'sparse_values': {'indices': [0,1], 'values': embedding_values()}}
+        ],
+        namespace=target_namespace
+    )
+
+    poll_stats_for_namespace(idx, target_namespace)
+
+    # Check the vector count reflects some data has been upserted
+    stats = idx.describe_index_stats()
+    assert stats.total_vector_count >= 9
+    assert stats.namespaces[target_namespace].vector_count == 9
+