Skip to content

Commit a330b63

Browse files
[ce] repository control plane and data plane (#520)
## Problem First pass implementation of control plane SDK for repositories. Also includes a basic data plane impl that avoids using the OpenAPI generated types due to Document being such an open-ended type. More work is needed here ```python pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY")) repo_name = "aryn-parsed-example-8" repo_model = pc.create_repository( name=f"{repo_name}", spec={ "serverless": { "cloud": "aws", "region": "us-east-1", }, }, schema={ "fields": { "$.parsed[*].text_representation": { "description": "Text representation of the chunk", "embedded": True, }, }, }, timeout=300, ) repo_model = pc.describe_repository(name=repo_name) # data plane repo = pc.Repository(host=repo_model.host) document = get_document() response = repo.upsert( namespace="foo", document=document, echo=True, ) pc.delete_repository(name=repo_name, timeout=300) ``` ## Solution Describe the approach you took. Link to any relevant bugs, issues, docs, or other resources. ## Type of Change - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] This change requires a documentation update - [ ] Infrastructure change (CI configs, etc) - [ ] Non-code change (docs, etc) - [ ] None of the above: (explain here) ## Test Plan Describe specific steps for validating this change. --------- Co-authored-by: rohanshah18 <rohan.s@pinecone.io>
1 parent f66b42b commit a330b63

18 files changed

+1044
-18
lines changed

pinecone/pinecone.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
if TYPE_CHECKING:
1717
from pinecone.config import Config, OpenApiConfiguration
1818
from pinecone.db_data import _Index as Index, _IndexAsyncio as IndexAsyncio
19+
from pinecone.repository_data import _Repository as Repository
1920
from pinecone.db_control.index_host_store import IndexHostStore
2021
from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi
2122
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed
@@ -42,6 +43,7 @@
4243
RestoreJobModel,
4344
RestoreJobList,
4445
)
46+
from pinecone.repository_control.models import RepositoryModel, RepositoryList, DocumentSchema
4547

4648

4749
class Pinecone(PluginAware, LegacyPineconeDBControlInterface):
@@ -241,6 +243,9 @@ def __init__(
241243
self._db_control = None # Lazy initialization
242244
""" :meta private: """
243245

246+
self._repository_control = None # Lazy initialization
247+
""" :meta private: """
248+
244249
super().__init__() # Initialize PluginAware
245250

246251
@property
@@ -273,6 +278,21 @@ def db(self):
273278
)
274279
return self._db_control
275280

281+
@property
282+
def repository_ctrl(self):
283+
"""
284+
RepositoryControl is a namespace where an instance of the `pinecone.repository_control.RepositoryControl` class is lazily created and cached.
285+
"""
286+
if self._repository_control is None:
287+
from pinecone.repository_control.repository_control import RepositoryControl
288+
289+
self._repository_control = RepositoryControl(
290+
config=self._config,
291+
openapi_config=self._openapi_config,
292+
pool_threads=self._pool_threads,
293+
)
294+
return self._repository_control
295+
276296
@property
277297
def index_host_store(self) -> "IndexHostStore":
278298
""":meta private:"""
@@ -460,6 +480,26 @@ def list_restore_jobs(
460480
def describe_restore_job(self, *, job_id: str) -> "RestoreJobModel":
461481
return self.db.restore_job.describe(job_id=job_id)
462482

483+
def create_repository(
484+
self,
485+
name: str,
486+
spec: Union[Dict, "ServerlessSpec"],
487+
schema: Union[Dict, "DocumentSchema"],
488+
timeout: Optional[int] = None,
489+
) -> "RepositoryModel":
490+
return self.repository_ctrl.repository.create(
491+
name=name, spec=spec, schema=schema, timeout=timeout
492+
)
493+
494+
def describe_repository(self, name: str) -> "RepositoryModel":
495+
return self.repository_ctrl.repository.describe(name=name)
496+
497+
def list_repositories(self) -> "RepositoryList":
498+
return self.repository_ctrl.repository.list()
499+
500+
def delete_repository(self, name: str, timeout: Optional[int] = None):
501+
return self.repository_ctrl.repository.delete(name=name, timeout=timeout)
502+
463503
@staticmethod
464504
def from_texts(*args, **kwargs):
465505
""":meta private:"""
@@ -518,6 +558,34 @@ def IndexAsyncio(self, host: str, **kwargs) -> "IndexAsyncio":
518558
**kwargs,
519559
)
520560

561+
def Repository(self, name: str = "", host: str = "", **kwargs) -> "Repository":
562+
from pinecone.repository_data import _Repository
563+
564+
if name == "" and host == "":
565+
raise ValueError("Either name or host must be specified")
566+
567+
pt = kwargs.pop("pool_threads", None) or self._pool_threads
568+
api_key = self._config.api_key
569+
openapi_config = self._openapi_config
570+
571+
if host != "":
572+
check_realistic_host(host)
573+
574+
# Use host url if it is provided
575+
repository_host = normalize_host(host)
576+
else:
577+
# Otherwise, get host url from describe_repository using the repo name
578+
repository_host = self.repository_ctrl.repository._get_host(name)
579+
580+
return _Repository(
581+
host=repository_host,
582+
api_key=api_key,
583+
pool_threads=pt,
584+
openapi_config=openapi_config,
585+
source_tag=self.config.source_tag,
586+
**kwargs,
587+
)
588+
521589

522590
def check_realistic_host(host: str) -> None:
523591
""":meta private:
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from .models import *
2+
from .repository_control import RepositoryControl
3+
from pinecone.db_control.enums import *
4+
5+
__all__ = [
6+
# from pinecone.db_control.enums
7+
"CloudProvider",
8+
"AwsRegion",
9+
"GcpRegion",
10+
"AzureRegion",
11+
# from .models
12+
"ServerlessSpec",
13+
"ServerlessSpecDefinition",
14+
"RepositoryList",
15+
"RepositoryModel",
16+
# direct imports
17+
"RepositoryControl",
18+
]
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from .document_schema import DocumentSchema
2+
from .repository_description import ServerlessSpecDefinition
3+
from .repository_list import RepositoryList
4+
from .repository_model import RepositoryModel
5+
from .serverless_spec import ServerlessSpec
6+
7+
8+
__all__ = [
9+
"DocumentSchema",
10+
"ServerlessSpec",
11+
"ServerlessSpecDefinition",
12+
"RepositoryList",
13+
"RepositoryModel",
14+
]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from pinecone.core.openapi.repository_control.model.document_schema import (
2+
DocumentSchema as OpenAPIDocumentSchema,
3+
)
4+
import json
5+
6+
7+
class DocumentSchema:
8+
def __init__(self, schema: OpenAPIDocumentSchema):
9+
self.schema = schema
10+
11+
def __str__(self):
12+
return str(self.schema)
13+
14+
def __getattr__(self, attr):
15+
return getattr(self.schema, attr)
16+
17+
def __getitem__(self, key):
18+
return self.__getattr__(key)
19+
20+
def __repr__(self):
21+
return json.dumps(self.to_dict(), indent=4)
22+
23+
def to_dict(self):
24+
return self.schema.to_dict()
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from typing import NamedTuple, Dict, Literal
2+
3+
4+
class ServerlessSpecDefinition(NamedTuple):
5+
cloud: str
6+
region: str
7+
8+
9+
ServerlessKey = Literal["serverless"]
10+
ServerlessSpec = Dict[ServerlessKey, ServerlessSpecDefinition]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import json
2+
from pinecone.core.openapi.repository_control.model.repository_list import (
3+
RepositoryList as OpenAPIRepositoryList,
4+
)
5+
from .repository_model import RepositoryModel
6+
from typing import List
7+
8+
9+
class RepositoryList:
10+
def __init__(self, repository_list: OpenAPIRepositoryList):
11+
self.repository_list = repository_list
12+
self.repositories = [RepositoryModel(i) for i in self.repository_list.repositories]
13+
self.current = 0
14+
15+
def names(self) -> List[str]:
16+
return [i.name for i in self.repositories]
17+
18+
def __getitem__(self, key):
19+
return self.repositories[key]
20+
21+
def __len__(self):
22+
return len(self.repositories)
23+
24+
def __iter__(self):
25+
return iter(self.repositories)
26+
27+
def __str__(self):
28+
return str(self.repositories)
29+
30+
def __repr__(self):
31+
return json.dumps([i.to_dict() for i in self.repositories], indent=4)
32+
33+
def __getattr__(self, attr):
34+
return getattr(self.repository_list, attr)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from pinecone.core.openapi.repository_control.model.repository_model import (
2+
RepositoryModel as OpenAPIRepositoryModel,
3+
)
4+
import json
5+
6+
7+
class RepositoryModel:
8+
def __init__(self, repository: OpenAPIRepositoryModel):
9+
self.repository = repository
10+
11+
def __str__(self):
12+
return str(self.repository)
13+
14+
def __getattr__(self, attr):
15+
return getattr(self.repository, attr)
16+
17+
def __getitem__(self, key):
18+
return self.__getattr__(key)
19+
20+
def __repr__(self):
21+
return json.dumps(self.to_dict(), indent=4)
22+
23+
def to_dict(self):
24+
return self.repository.to_dict()
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from dataclasses import dataclass
2+
from typing import Union
3+
from enum import Enum
4+
5+
from pinecone.db_control.enums import CloudProvider, AwsRegion, GcpRegion, AzureRegion
6+
7+
8+
@dataclass(frozen=True)
9+
class ServerlessSpec:
10+
cloud: str
11+
region: str
12+
13+
def __init__(
14+
self,
15+
cloud: Union[CloudProvider, str],
16+
region: Union[AwsRegion, GcpRegion, AzureRegion, str],
17+
):
18+
# Convert Enums to their string values if necessary
19+
object.__setattr__(self, "cloud", cloud.value if isinstance(cloud, Enum) else str(cloud))
20+
object.__setattr__(
21+
self, "region", region.value if isinstance(region, Enum) else str(region)
22+
)
23+
24+
def asdict(self):
25+
return {"serverless": {"cloud": self.cloud, "region": self.region}}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import logging
2+
from typing import Optional, TYPE_CHECKING
3+
4+
from pinecone.core.openapi.repository_control.api.manage_repositories_api import (
5+
ManageRepositoriesApi,
6+
)
7+
from pinecone.openapi_support.api_client import ApiClient
8+
9+
from pinecone.utils import setup_openapi_client, PluginAware
10+
from pinecone.core.openapi.repository_control import API_VERSION
11+
12+
13+
logger = logging.getLogger(__name__)
14+
""" :meta private: """
15+
16+
if TYPE_CHECKING:
17+
from .resources.sync.repository import RepositoryResource
18+
from pinecone.config import Config, OpenApiConfiguration
19+
20+
21+
class RepositoryControl(PluginAware):
22+
def __init__(
23+
self, config: "Config", openapi_config: "OpenApiConfiguration", pool_threads: int
24+
) -> None:
25+
self.config = config
26+
""" :meta private: """
27+
28+
self._openapi_config = openapi_config
29+
""" :meta private: """
30+
31+
self._pool_threads = pool_threads
32+
""" :meta private: """
33+
34+
self._repository_api = setup_openapi_client(
35+
api_client_klass=ApiClient,
36+
api_klass=ManageRepositoriesApi,
37+
config=self.config,
38+
openapi_config=self._openapi_config,
39+
pool_threads=self._pool_threads,
40+
api_version=API_VERSION,
41+
)
42+
""" :meta private: """
43+
44+
self._repository_resource: Optional["RepositoryResource"] = None
45+
""" :meta private: """
46+
47+
super().__init__() # Initialize PluginAware
48+
49+
@property
50+
def repository(self) -> "RepositoryResource":
51+
if self._repository_resource is None:
52+
from .resources.sync.repository import RepositoryResource
53+
54+
self._repository_resource = RepositoryResource(
55+
repository_api=self._repository_api,
56+
config=self.config,
57+
openapi_config=self._openapi_config,
58+
pool_threads=self._pool_threads,
59+
)
60+
return self._repository_resource
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from typing import Dict
2+
from pinecone.config import Config
3+
from pinecone.core.openapi.repository_control.api.manage_repositories_api import (
4+
ManageRepositoriesApi as RepositoriesOperationsApi,
5+
)
6+
from pinecone.openapi_support.exceptions import PineconeException
7+
from pinecone.utils import normalize_host
8+
9+
10+
class SingletonMeta(type):
11+
_instances: Dict[str, str] = {}
12+
13+
def __call__(cls, *args, **kwargs):
14+
if cls not in cls._instances:
15+
instance = super().__call__(*args, **kwargs)
16+
cls._instances[cls] = instance
17+
return cls._instances[cls]
18+
19+
20+
class RepositoryHostStore(metaclass=SingletonMeta):
21+
_repositoryHosts: Dict[str, str]
22+
23+
def __init__(self) -> None:
24+
self._repositoryHosts = {}
25+
26+
def _key(self, config: Config, repository_name: str) -> str:
27+
return ":".join([config.api_key, repository_name])
28+
29+
def delete_host(self, config: Config, repository_name: str):
30+
key = self._key(config, repository_name)
31+
if key in self._repositoryHosts:
32+
del self._repositoryHosts[key]
33+
34+
def key_exists(self, key: str) -> bool:
35+
return key in self._repositoryHosts
36+
37+
def set_host(self, config: Config, repository_name: str, host: str):
38+
if host:
39+
key = self._key(config, repository_name)
40+
self._repositoryHosts[key] = normalize_host(host)
41+
42+
def get_host(self, api: RepositoriesOperationsApi, config: Config, repository_name: str) -> str:
43+
key = self._key(config, repository_name)
44+
if self.key_exists(key):
45+
return self._repositoryHosts[key]
46+
else:
47+
description = api.describe_repository(repository_name)
48+
self.set_host(config, repository_name, description.host)
49+
if not self.key_exists(key):
50+
raise PineconeException(
51+
f"Could not get host for repository: {repository_name}. Call describe_repository('{repository_name}') to check the current status."
52+
)
53+
return self._repositoryHosts[key]

0 commit comments

Comments
 (0)