Skip to content

Commit

Permalink
don't re-embed existing docs
Browse files Browse the repository at this point in the history
  • Loading branch information
jimmoffet committed Oct 31, 2024
1 parent 2c418bf commit 0178385
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 3 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,5 @@

8. **Set up pipelines to access models via API**:

- Once you're in, you should see the four default models available in the chat. If not, check that the pipelines server is running on 9099 and in the UI click on your user in the lower left > Admin Panel > Settings > Connections > OpenAI API section. Set the API URL to http://localhost:9099 and the API key to 0p3n-w3bu! and hit refresh to see if it connects to the pipeline server.
- Once you're in, you should see the four default models available in the chat. If not, check that the pipelines server is running on 9099 and in the UI click on your user in the lower left > Admin Panel > Settings > Connections > OpenAI API section. Set the API URL to [<http://localhost:9099](http://localhost:9099>) and the API key to 0p3n-w3bu! and hit refresh to see if it connects to the pipeline server.
- After completing these steps, the models specified in the pipeline settings should be available in the drop down at the upper left when you create a new conversation.
23 changes: 21 additions & 2 deletions backend/apps/rag/clients/vector_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import hashlib
import json
from chromadb.utils.batch_utils import create_batches
from redisvl.query import VectorQuery
from redisvl.query import VectorQuery, FilterQuery
from redisvl.index import AsyncSearchIndex
from redisvl.query.filter import Tag
import logging
Expand Down Expand Up @@ -71,7 +71,10 @@ def create_collection(
def get_collection(self, name: str) -> "VectorCollection":
"""Retrieve an existing vector collection."""
# TODO: check if collection exists, meaning entries with collection tag
return VectorCollection(name=name, vector_client=self)
if self.backend == "chroma":
self.chroma_client.get_collection(name=name)
elif self.backend == "redis":
return VectorCollection(name=name, vector_client=self)

def delete_collection(self, name: str):
"""Delete a vector collection. For Redis, handled via collection tag."""
Expand Down Expand Up @@ -216,6 +219,22 @@ async def get(
[await self.collection.fetch(doc_id) for doc_id in ids] if ids else []
)

async def get_one(self) -> List[Dict[str, Any]]:
"""Retrieve documents by their IDs."""
if self.vector_client.backend == "chroma":
return NotImplementedError
elif self.vector_client.backend == "redis":
collection_tag = Tag("collection") == self.name
filter_query = FilterQuery(
filter_expression=collection_tag,
return_fields=["doc_id"],
num_results=1,
)
collection_entries = await self.collection._query(filter_query)
if len(collection_entries) > 0:
return collection_entries
return None

async def delete(self, ids: List[str]):
"""Delete documents by their IDs."""
if self.vector_client.backend == "chroma":
Expand Down
14 changes: 14 additions & 0 deletions backend/apps/rag/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,13 @@ async def store_data_in_vector_db(
data, collection_name, overwrite: bool = False
) -> bool:

# check for collection
collection = VECTOR_CLIENT.get_collection(collection_name)
result = await collection.get_one()
if result is not None and not overwrite:
log.info(f"collection already exists for name: {collection_name}")
return True

text_splitter = RecursiveCharacterTextSplitter(
chunk_size=app.state.config.CHUNK_SIZE,
chunk_overlap=app.state.config.CHUNK_OVERLAP,
Expand All @@ -915,6 +922,13 @@ async def store_data_in_vector_db(
async def store_text_in_vector_db(
text, metadata, collection_name, overwrite: bool = False
) -> bool:

# check for collection
collection = VECTOR_CLIENT.get_collection(collection_name)
if collection is not None and not overwrite:
log.info(f"collection already exists for name: {collection_name}")
return True

text_splitter = RecursiveCharacterTextSplitter(
chunk_size=app.state.config.CHUNK_SIZE,
chunk_overlap=app.state.config.CHUNK_OVERLAP,
Expand Down

0 comments on commit 0178385

Please sign in to comment.