Skip to content
This repository has been archived by the owner on Sep 9, 2024. It is now read-only.

Commit

Permalink
Pinecone Plugin : Added SearchDocuments , SearchDocumenntChunks servi…
Browse files Browse the repository at this point in the history
…ce (#159)

* Pinecone Plugin : Added SearchDocuments , SearchDocumenntChunks service

* Pinecone Plugin : Refractored group_by filter in Pinecone Search Results

* Pinecone Plugin :  Added date Filter in Pinecone
  • Loading branch information
Vivek-Lahole authored Jul 1, 2024
1 parent 1384668 commit f484cf5
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 14 deletions.
177 changes: 167 additions & 10 deletions packages/plugins/pinecone-vector-search-service/src/services/pinecone.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
SearchContext,
DocType,
SearchDocument,
SearchSnippet,
SearchResults,
SearchChunk,
AppNameDefinitions,
Expand Down Expand Up @@ -49,7 +50,7 @@ export default class PineconeService extends AbstractVectorDBService {
existingIndexes.indexes &&
existingIndexes.indexes.some((index) => index.name === indexName)
) {
console.log(`Index ${indexName} already exists`);
this.logger_.error(`Index with name ${indexName} exists`);
return;
}
await this.pineconeClient_.createIndex({
Expand Down Expand Up @@ -89,11 +90,11 @@ export default class PineconeService extends AbstractVectorDBService {
await index.namespace(PineconeNamespaceType.TITLE).deleteMany(docIds);

this.logger_.info(
`deleteDocuments: Done Deleting Docs From Quadrant ${docIds.length}`
`deleteDocuments: Done Deleting Docs From Pinecone ${docIds.length}`
);
} catch (error) {
this.logger_.error(
`deleteDocuments: Error Deleting Docs From Quadrant ${error.message}`
`deleteDocuments: Error Deleting Docs From Pinecone ${error.message}`
);
}
}
Expand Down Expand Up @@ -142,6 +143,10 @@ export default class PineconeService extends AbstractVectorDBService {
return uuidv5(name, this.UUIDHASH);
}

dateToUnixTimestamp(date: Date): number {
return Math.floor(date.getTime() / 1000);
}

private fomratIndexableDocToPineconeRecords = (
doc: IndexableDocChunk,
namespace: string
Expand All @@ -161,25 +166,177 @@ export default class PineconeService extends AbstractVectorDBService {
type: doc.type,
content: doc.content,
chunkLinks: doc.chunkLinks,
metadata: doc.metadata,
updatedAt: doc.updatedAt,
updatedAt: this.dateToUnixTimestamp(doc.updatedAt),
},
values: vectors,
};
};

searchDocuments(
org_id: string,
private pineconeMetadataFilter(context: SearchContext) {
const filter = [];

// Filter by source
if (context.sources && context.sources.length > 0) {
filter.push({
source: { $in: [...context.sources] },
});
}

// Filter by organisation_id
if (context.organisation_id) {
filter.push({
organisationId: { $eq: context.organisation_id },
});
}

// Filter by date
if (context.date) {
if (context.date.from) {
filter.push({
updatedAt: {
$gte: this.dateToUnixTimestamp(new Date(context.date.from)),
},
});
}

if (context.date.to) {
filter.push({
updatedAt: {
$lte: this.dateToUnixTimestamp(new Date(context.date.to)),
},
});
}
}

// Filter by types
if (context.types && context.types.length > 0) {
filter.push({
type: { $in: [...context.types] },
});
}

const pineconeMetadataFilter = {
$and: filter,
};

return pineconeMetadataFilter;
}

// This function returns a list of documents that match the search query. This is useful for the search service which cares about the document level search results.
async searchDocuments(
indexName: string,
vector: number[],
context?: SearchContext
): Promise<SearchResults> {
throw new Error("Method not implemented.");
try {
// Construct Search Filters
const filter = this.pineconeMetadataFilter(context);

// Build a Search Query
const query = {
vector,
topK: context?.top ? Number(context?.top) : 10,
filter: filter,
includeMetadata: true,
};

const index = this.pineconeClient_.index(indexName);
const pineconeSearchResults = await index
.namespace(PineconeNamespaceType.CONTENT)
.query(query);

const formattedSearchResults = this.formatPineconeSearchResults(
pineconeSearchResults
);

return formattedSearchResults;
} catch (error) {
this.logger_.error(
`searchDocuments: Error Searching Docs From Pinecone ${error.message}`
);
}
}
searchDocumentChunks(

private formatPineconeSearchResults = (pineconeResults): SearchResults => {
const documentMap = new Map<string, SearchSnippet[]>();

pineconeResults.matches?.forEach((result) => {
const documentId = result.metadata.documentId;
const snippet: SearchSnippet = {
score: result.score,
content: result.metadata.content,
updatedAt: result.metadata.updatedAt,
};

if (documentMap.has(documentId)) {
const docs = documentMap.get(documentId)!;
docs.push(snippet);
documentMap.set(documentId, docs);
} else {
documentMap.set(documentId, [snippet]);
}
});

const hits: SearchDocument[] = [];

documentMap.forEach((docCollection, documentId) => {
hits.push({
documentId: documentId,
snippets: docCollection,
});
});

return {
hits,
};
};

async searchDocumentChunks(
indexName: string,
vector: number[],
context?: SearchContext
): Promise<SearchChunk[]> {
throw new Error("Method not implemented.");
try {
this.logger_.info("Search Chunks Called");
// Construct Search Filters
context.top = 3;
const filter = this.pineconeMetadataFilter(context);

// Build a Search Query
const query = {
vector,
topK: context?.top ? Number(context?.top) : 10,
filter: filter,
includeMetadata: true,
};

const index = this.pineconeClient_.index(indexName);
const pineconeSearchResults = await index
.namespace(PineconeNamespaceType.CONTENT)
.query(query);

const chunks: SearchChunk[] = pineconeSearchResults.matches?.map(
(result) => {
return {
score: Number(result.score),
content: String(result.metadata.content),
documentId: String(result.metadata.documentId),
organisationId: String(result.metadata.organisationId),
chunkId: Number(result.metadata.chunkId),
type: result.metadata.type as DocType,
source: String(result.metadata.source) as AppNameDefinitions,
title: String(result.metadata.title),
updatedAt: String(result.metadata.updatedAt),
};
}
);
this.logger_.info(`Search Chunks ${chunks}`);

return chunks;
} catch (error) {
this.logger_.error(
`searchDocumentChunks: Error Searching Docs From Pinecone ${error}`
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -152,19 +152,19 @@ describe("qdrantService", () => {
mockVector,
{ top: 1 }
);
expect(result1.hits.length).toEqual(1);
expect(result1.hits?.length).toEqual(1);
const result2 = await service.searchDocuments(
"OcularTestIndex",
mockVector,
{ top: 2 }
);
expect(result2.hits.length).toEqual(2);
expect(result2.hits?.length).toEqual(2);
const result3 = await service.searchDocuments(
"OcularTestIndex",
mockVector,
{ top: 3 }
);
expect(result3.hits.length).toEqual(3);
expect(result3.hits?.length).toEqual(3);
});

it("Filter By One Source", async () => {
Expand Down Expand Up @@ -278,7 +278,7 @@ describe("qdrantService", () => {
);

// Sort the results by content for comparison
expect(result.hits.length).toEqual(3);
expect(result.hits?.length).toEqual(3);

await service.deleteIndex("OcularTestIndex");
});
Expand Down
1 change: 1 addition & 0 deletions packages/types/src/common/document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ export interface SearchDocChunk {
}

export interface SearchSnippet {
score: string;
content: string;
updatedAt: Date;
}
Expand Down

0 comments on commit f484cf5

Please sign in to comment.