-
Notifications
You must be signed in to change notification settings - Fork 84
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Fix] Fetch when vector id string contains spaces (#372)
## Problem Some data operations fail when the vector id string contains a space. ```python from pinecone import Pinecone pc = Pinecone() pc.fetch(ids=["id with string"]) # no results returned, even when vector exists ``` ## Solution The problem occurred due to the way spaces were being encoded as `+` instead of `%20` in url query params. The fix was a small adjustment to our code generation templates. I added test coverage for upsert / query / fetch with various weird ids to make sure the change in encoding hasn't broken any other use cases that could pop up. ## Type of Change - [x] Bug fix (non-breaking change which fixes an issue)
- Loading branch information
Showing
6 changed files
with
146 additions
and
9 deletions.
There are no files selected for viewing
Submodule apis
updated
from e9b47c to 062b11
Submodule python-oas-templates
updated
from b72bd5 to 7e6d01
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import pytest | ||
from .seed import weird_valid_ids, weird_invalid_ids | ||
|
||
|
||
class TestHandlingOfWeirdIds: | ||
def test_fetch_weird_ids(self, idx, weird_ids_namespace): | ||
weird_ids = weird_valid_ids() | ||
batch_size = 100 | ||
for i in range(0, len(weird_ids), batch_size): | ||
ids_to_fetch = weird_ids[i : i + batch_size] | ||
results = idx.fetch(ids=ids_to_fetch, namespace=weird_ids_namespace) | ||
assert results.usage["read_units"] > 0 | ||
assert len(results.vectors) == len(ids_to_fetch) | ||
for id in ids_to_fetch: | ||
assert id in results.vectors | ||
assert results.vectors[id].id == id | ||
assert results.vectors[id].metadata == None | ||
assert results.vectors[id].values != None | ||
assert len(results.vectors[id].values) == 2 | ||
|
||
@pytest.mark.parametrize("id_to_query", weird_valid_ids()) | ||
def test_query_weird_ids(self, idx, weird_ids_namespace, id_to_query): | ||
results = idx.query(id=id_to_query, top_k=10, namespace=weird_ids_namespace, include_values=True) | ||
assert results.usage["read_units"] > 0 | ||
assert len(results.matches) == 10 | ||
assert results.namespace == weird_ids_namespace | ||
assert results.matches[0].id != None | ||
assert results.matches[0].metadata == None | ||
assert results.matches[0].values != None | ||
assert len(results.matches[0].values) == 2 | ||
|
||
def test_list_weird_ids(self, idx, weird_ids_namespace): | ||
expected_ids = set(weird_valid_ids()) | ||
id_iterator = idx.list(namespace=weird_ids_namespace) | ||
for page in id_iterator: | ||
for id in page: | ||
assert id in expected_ids | ||
|
||
@pytest.mark.parametrize("id_to_upsert", weird_invalid_ids()) | ||
def test_weird_invalid_ids(self, idx, weird_ids_namespace, id_to_upsert): | ||
with pytest.raises(Exception) as e: | ||
idx.upsert(vectors=[(id_to_upsert, [0.1, 0.1])], namespace=weird_ids_namespace) | ||
assert "Vector ID must be ASCII" in str(e.value) | ||
|
||
def test_null_character(self, idx, weird_ids_namespace): | ||
with pytest.raises(Exception) as e: | ||
idx.upsert(vectors=[("\0", [0.1, 0.1])], namespace=weird_ids_namespace) | ||
|
||
assert "Vector ID must not contain null character" in str(e.value) |