Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
jhamon committed Jan 10, 2024
1 parent 1fc6d47 commit 29f5221
Show file tree
Hide file tree
Showing 13 changed files with 298 additions and 51 deletions.
4 changes: 2 additions & 2 deletions .github/actions/test-data-plane/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ inputs:
description: 'Whether to use gRPC or REST'
required: false
default: 'true'
freshness_sleep_seconds:
freshness_timeout_seconds:
description: 'The number of seconds to wait for the index to become fresh'
required: false
default: '60'
Expand Down Expand Up @@ -54,4 +54,4 @@ runs:
USE_GRPC: ${{ inputs.use_grpc }}
METRIC: ${{ inputs.metric }}
SPEC: ${{ inputs.spec }}
FRESHNESS_SLEEP_SECONDS: ${{ inputs.freshness_sleep_seconds }}
FRESHNESS_TIMEOUT_SECONDS: ${{ inputs.freshness_timeout_seconds }}
5 changes: 4 additions & 1 deletion pinecone/data/vector_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ def _tuple_to_vector(item, check_type: bool) -> Vector:
if len(item) < 2 or len(item) > 3:
raise VectorTupleLengthError(item)
id, values, metadata = fix_tuple_length(item, 3)
return Vector(id=id, values=convert_to_list(values), metadata=metadata or {}, _check_type=check_type)
if isinstance(values, SparseValues):
raise ValueError("Sparse values are not supported in tuples. Please use either dicts or a Vector objects as inputs.")
else:
return Vector(id=id, values=convert_to_list(values), metadata=metadata or {}, _check_type=check_type)

@staticmethod
def _dict_to_vector(item, check_type: bool) -> Vector:
Expand Down
2 changes: 1 addition & 1 deletion pinecone/utils/convert_to_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ def convert_to_list(obj):

if class_name == 'list':
return obj
elif hasattr(obj, 'tolist'):
elif hasattr(obj, 'tolist') and callable(getattr(obj, 'tolist')):
return obj.tolist()
else:
return list(obj)
Empty file.
Empty file.
32 changes: 20 additions & 12 deletions tests/integration/data/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
# - environment: free vs paid
# - with metadata vs without metadata

@pytest.fixture
@pytest.fixture(scope='session')
def api_key():
return get_environment_var('PINECONE_API_KEY')

@pytest.fixture
@pytest.fixture(scope='session')
def client(api_key):
use_grpc = os.environ.get('USE_GRPC', 'false') == 'true'
if use_grpc:
Expand All @@ -25,27 +25,35 @@ def client(api_key):
from pinecone import Pinecone
return Pinecone(api_key=api_key)

@pytest.fixture
@pytest.fixture(scope='session')
def metric():
return get_environment_var('METRIC', 'cosine')

@pytest.fixture
@pytest.fixture(scope='session')
def spec():
return json.loads(get_environment_var('SPEC'))

@pytest.fixture
@pytest.fixture(scope='session')
def index_name():
return 'dataplane-' + random_string(20)

@pytest.fixture(scope='session')
def index_host(client, index_name, metric, spec):
client.create_index(
name=index_name,
dimension=2,
metric=metric,
spec=spec
)
description = client.describe_index(name=index_name)
return description.host

# Namespaces not scoped to session; each test can have its own namespace
# to avoid collisions
@pytest.fixture
def namespace():
return random_string(10)

@pytest.fixture
def index_host(client, index_name, metric, spec):
client.create_index(name=index_name, dimension=2, metric=metric, spec=spec)
description = client.describe_index(name=index_name)
return description.host

def sleep_t():
return int(os.environ.get('FRESHNESS_SLEEP_SECONDS', 60))
def idx(client, index_name, index_host):
return client.Index(name=index_name, host=index_host)
Empty file.
101 changes: 66 additions & 35 deletions tests/integration/data/cosine/test_upsert.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,85 @@
import pytest
import time
from pinecone import Vector
import os
import random
from pinecone import Vector, SparseValues
from ...helpers import poll_stats_for_namespace

def test_upsert_to_default_namespace(client, index_name, sleep_t):
expected_dimension = 2
desc = client.describe_index(index_name)
assert desc.dimension == expected_dimension
assert desc.metric == 'cosine'

idx = client.Index(index_name)
@pytest.mark.parametrize('use_nondefault_namespace', [True, False])
def test_upsert_to_namespace(
idx,
namespace,
use_nondefault_namespace
):
target_namespace = namespace if use_nondefault_namespace else ''

# Upsert with tuples
idx.upsert(vectors=[
('1', [1.0, 2.0]),
('2', [3.0, 4.0]),
('3', [5.0, 6.0])
])
('1', embedding_values()),
('2', embedding_values()),
('3', embedding_values())
],
namespace=target_namespace
)

# Upsert with objects
idx.upsert(vectors=[
Vector('4', [7.0, 8.0]),
Vector('5', [9.0, 10.0]),
Vector('6', [11.0, 12.0])
])
Vector(id='4', values=embedding_values()),
Vector(id='5', values=embedding_values()),
Vector(id='6', values=embedding_values())
],
namespace=target_namespace
)

# Upsert with dict
idx.upsert(vectors=[
{'id': '7', 'values': [13.0, 14.0]},
{'id': '8', 'values': [15.0, 16.0]},
{'id': '9', 'values': [17.0, 18.0]}
])
{'id': '7', 'values': embedding_values()},
{'id': '8', 'values': embedding_values()},
{'id': '9', 'values': embedding_values()}
],
namespace=target_namespace
)

time.sleep(sleep_t)
poll_stats_for_namespace(idx, target_namespace)

# Check the vector count reflects some data has been upserted
stats = idx.describe_index_stats()
assert stats.vector_count == 9

assert stats.total_vector_count >= 9
assert stats.namespaces[target_namespace].vector_count == 9

def test_upsert_to_custom_namespace(client, index_name, namespace):
expected_dimension = 2
assert client.describe_index(index_name).dimension == expected_dimension
@pytest.mark.parametrize('use_nondefault_namespace', [True, False])
@pytest.mark.skipif(os.getenv('METRIC') != 'dotproduct', reason='Only metric=dotprodouct indexes support hybrid')
def test_upsert_to_namespace_with_sparse_embedding_values(
idx,
namespace,
use_nondefault_namespace
):
target_namespace = namespace if use_nondefault_namespace else ''

idx = client.Index(index_name)

# Upsert with tuples
# Upsert with sparse values object
idx.upsert(vectors=[
('1', [1.0, 2.0]),
('2', [3.0, 4.0]),
('3', [5.0, 6.0])
],
namespace=namespace
Vector(
id='1',
sparse_values=SparseValues(
indices=[0,1],
values=embedding_values()
)
),
],
namespace=target_namespace
)

# Upsert with sparse values dict
idx.upsert(vectors=[
{'id': '2', 'sparse_values': {'indices': [0,1], 'values': embedding_values()}},
{'id': '3', 'sparse_values': {'indices': [0,1], 'values': embedding_values()}}
],
namespace=target_namespace
)

poll_stats_for_namespace(idx, target_namespace)

# Check the vector count reflects some data has been upserted
stats = idx.describe_index_stats()
assert stats.total_vector_count >= 9
assert stats.namespaces[target_namespace].vector_count == 9

Loading

0 comments on commit 29f5221

Please sign in to comment.