diff --git a/apps/slackbot/ai_fcts.py b/apps/slackbot/ai_fcts.py index 7226f02..28d7c25 100644 --- a/apps/slackbot/ai_fcts.py +++ b/apps/slackbot/ai_fcts.py @@ -31,7 +31,7 @@ client=QdrantClient(url=qdrant_uri, port=6333), collection_name=qdrant_collection_name, content_payload_key="content", - metadata_payload_key=None, + metadata_payload_key="metadata", distance_strategy="Cosine", embeddings=InfinityEmbeddings(model=infinity_model, infinity_api_url=infinity_api_url) ) @@ -80,7 +80,7 @@ def format_docs(docs): context = "" for index, doc in enumerate(docs): - context += f"Document Rank {index + 1}: {doc.page_content}\n\n" + context += f"Document Rank {index + 1}. Source: https://www.notion.so/{doc.metadata['page_id']}. Content: {doc.page_content}\n\n" return context # create function to invoke the retrievalQA diff --git a/apps/vectordb_sync/vectordb_sync_fcts.py b/apps/vectordb_sync/vectordb_sync_fcts.py index 3997ffb..a81c147 100644 --- a/apps/vectordb_sync/vectordb_sync_fcts.py +++ b/apps/vectordb_sync/vectordb_sync_fcts.py @@ -113,7 +113,7 @@ def notion_to_qdrant(id_to_process) -> None: points_to_update = [PointStruct(id=str(uuid.uuid4()), vector=chunk_embedding, - payload={"content": chunk, "page_id": id_to_process, "type": "notion"}) for chunk_embedding, chunk in zip(chunks_embedded, chunks)] + payload={"content": chunk, "metadata":{"page_id": id_to_process, "type": "notion"}}) for chunk_embedding, chunk in zip(chunks_embedded, chunks)] @@ -179,7 +179,7 @@ def web_to_qdrant(id_to_process): # Insert the preprocessed chunk into Qdrant points_to_update = [PointStruct(id=str(uuid.uuid4()), vector=chunk_embedding, - payload={"content": chunk, "page_id": id_to_process, "type": "web"}) for + payload={"content": chunk, "metadata":{"page_id": id_to_process, "type": "web"}}) for chunk_embedding, chunk in zip(chunks_embedded, chunks)] qdrant_client.upsert( @@ -258,7 +258,7 @@ def slack_to_qdrant(id_to_process): points_to_update = [PointStruct(id=str(uuid.uuid4()), vector=chunk_embedding, - payload={"content": chunk, "page_id": id_to_process, "type": "slack"}) for chunk_embedding, chunk in zip(chunks_embedded, chunks)] + payload={"content": chunk, "metadata":{"page_id": id_to_process, "type": "slack"}}) for chunk_embedding, chunk in zip(chunks_embedded, chunks)]