Skip to content

Commit

Permalink
feat(api): adding chunking_strategy to polling helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
pstern-sl committed Jun 7, 2024
1 parent 178afa0 commit db4e1f7
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 4 deletions.
8 changes: 8 additions & 0 deletions src/openai/resources/beta/vector_stores/file_batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,13 @@ def create_and_poll(
*,
file_ids: List[str],
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFileBatch:
"""Create a vector store batch and poll until all files have been processed."""
batch = self.create(
vector_store_id=vector_store_id,
file_ids=file_ids,
chunking_strategy=chunking_strategy,
)
# TODO: don't poll unless necessary??
return self.poll(
Expand Down Expand Up @@ -306,6 +308,7 @@ def upload_and_poll(
max_concurrency: int = 5,
file_ids: List[str] = [],
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFileBatch:
"""Uploads the given files concurrently and then creates a vector store file batch.
Expand Down Expand Up @@ -343,6 +346,7 @@ def upload_and_poll(
vector_store_id=vector_store_id,
file_ids=[*file_ids, *(f.id for f in results)],
poll_interval_ms=poll_interval_ms,
chunking_strategy=chunking_strategy,
)
return batch

Expand Down Expand Up @@ -488,11 +492,13 @@ async def create_and_poll(
*,
file_ids: List[str],
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFileBatch:
"""Create a vector store batch and poll until all files have been processed."""
batch = await self.create(
vector_store_id=vector_store_id,
file_ids=file_ids,
chunking_strategy=chunking_strategy,
)
# TODO: don't poll unless necessary??
return await self.poll(
Expand Down Expand Up @@ -620,6 +626,7 @@ async def upload_and_poll(
max_concurrency: int = 5,
file_ids: List[str] = [],
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFileBatch:
"""Uploads the given files concurrently and then creates a vector store file batch.
Expand Down Expand Up @@ -680,6 +687,7 @@ async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> No
vector_store_id=vector_store_id,
file_ids=[*file_ids, *(f.id for f in uploaded_files)],
poll_interval_ms=poll_interval_ms,
chunking_strategy=chunking_strategy,
)
return batch

Expand Down
16 changes: 12 additions & 4 deletions src/openai/resources/beta/vector_stores/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,10 @@ def create_and_poll(
*,
vector_store_id: str,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Attach a file to the given vector store and wait for it to be processed."""
self.create(vector_store_id=vector_store_id, file_id=file_id)
self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)

return self.poll(
file_id,
Expand Down Expand Up @@ -301,27 +302,30 @@ def upload(
*,
vector_store_id: str,
file: FileTypes,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Upload a file to the `files` API and then attach it to the given vector store.
Note the file will be asynchronously processed (you can use the alternative
polling helper method to wait for processing to complete).
"""
file_obj = self._client.files.create(file=file, purpose="assistants")
return self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
return self.create(vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy)

def upload_and_poll(
self,
*,
vector_store_id: str,
file: FileTypes,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Add a file to a vector store and poll until processing is complete."""
file_obj = self._client.files.create(file=file, purpose="assistants")
return self.create_and_poll(
vector_store_id=vector_store_id,
file_id=file_obj.id,
chunking_strategy=chunking_strategy,
poll_interval_ms=poll_interval_ms,
)

Expand Down Expand Up @@ -542,9 +546,10 @@ async def create_and_poll(
*,
vector_store_id: str,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Attach a file to the given vector store and wait for it to be processed."""
await self.create(vector_store_id=vector_store_id, file_id=file_id)
await self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy)

return await self.poll(
file_id,
Expand Down Expand Up @@ -598,28 +603,31 @@ async def upload(
*,
vector_store_id: str,
file: FileTypes,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Upload a file to the `files` API and then attach it to the given vector store.
Note the file will be asynchronously processed (you can use the alternative
polling helper method to wait for processing to complete).
"""
file_obj = await self._client.files.create(file=file, purpose="assistants")
return await self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
return await self.create(vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy)

async def upload_and_poll(
self,
*,
vector_store_id: str,
file: FileTypes,
poll_interval_ms: int | NotGiven = NOT_GIVEN,
chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
) -> VectorStoreFile:
"""Add a file to a vector store and poll until processing is complete."""
file_obj = await self._client.files.create(file=file, purpose="assistants")
return await self.create_and_poll(
vector_store_id=vector_store_id,
file_id=file_obj.id,
poll_interval_ms=poll_interval_ms,
chunking_strategy=chunking_strategy
)


Expand Down

0 comments on commit db4e1f7

Please sign in to comment.