Skip to content

Qdrant Vectorstore Support #253

@kdcokenny

Description

@kdcokenny

I created a working qdrant vector store example if anyone wants to clean the code up & submit a PR (if at all necessary). If not, this could just be a good starting point for those who are wanting to use Qdrant with autogen.

# Creating qdrant client
from qdrant_client import QdrantClient

client = QdrantClient(url="***", api_key="***")

# Wrapping RetrieveUserProxyAgent
from litellm import embedding as test_embedding
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
from qdrant_client.models import SearchRequest, Filter, FieldCondition, MatchText

class QdrantRetrieveUserProxyAgent(RetrieveUserProxyAgent):
    def query_vector_db(
        self,
        query_texts: List[str],
        n_results: int = 10,
        search_string: str = "",
        **kwargs,
    ) -> Dict[str, Union[List[str], List[List[str]]]]:
        print(f"Inputs:\nquery_texts: {query_texts}\nn_results: {n_results}\nsearch_string: {search_string}\nkwargs: {kwargs}")
        # define your own query function here
        embed_response = test_embedding('text-embedding-ada-002', input=query_texts)

        all_embeddings: List[List[float]] = []

        for item in embed_response['data']:
            all_embeddings.append(item['embedding'])

        search_queries: List[SearchRequest] = []

        for embedding in all_embeddings:
            search_queries.append(
                SearchRequest(
                    vector=embedding,
                    filter=Filter(
                        must=[
                            FieldCondition(
                                key="page_content",
                                match=MatchText(
                                    text=search_string,
                                )
                            )
                        ]
                    ),
                    limit=n_results,
                    with_payload=True,
                )
            )

        search_response = client.search_batch(
            collection_name="{your collection name}",
            requests=search_queries,
        )

        return {
            "ids": [[scored_point.id for scored_point in batch] for batch in search_response],
            "documents": [[scored_point.payload.get('page_content', '') for scored_point in batch] for batch in search_response],
            "metadatas": [[scored_point.payload.get('metadata', {}) for scored_point in batch] for batch in search_response]
        }

    def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str = "", **kwargs):
        results = self.query_vector_db(
            query_texts=[problem],
            n_results=n_results,
            search_string=search_string,
            **kwargs,
        )

        self._results = results


# Test QdrantRetrieveUserProxyAgent
testproxyagent = QdrantRetrieveUserProxyAgent(
    name="ragproxyagent",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=2,
    retrieve_config={
        "task": "qa",
        "chunk_token_size": 2000,
        "client": "___",
        "embedding_model": "___",
    },
)

testproxyagent.retrieve_docs("What is Autogen?", n_results=10, search_string="autogen")

Metadata

Metadata

Labels

help wantedExtra attention is neededragretrieve-augmented generative agents

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions