diff --git a/package-lock.json b/package-lock.json index ba454c6c..6b747c56 100644 --- a/package-lock.json +++ b/package-lock.json @@ -77,6 +77,22 @@ "undici-types": "~5.26.4" } }, + "node_modules/@azure-rest/core-client": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@azure-rest/core-client/-/core-client-1.4.0.tgz", + "integrity": "sha512-ozTDPBVUDR5eOnMIwhggbnVmOrka4fXCs8n8mvUo4WLLc38kki6bAOByDoVZZPz/pZy2jMt2kwfpvy/UjALj6w==", + "dependencies": { + "@azure/abort-controller": "^2.0.0", + "@azure/core-auth": "^1.3.0", + "@azure/core-rest-pipeline": "^1.5.0", + "@azure/core-tracing": "^1.0.1", + "@azure/core-util": "^1.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@azure/abort-controller": { "version": "2.1.2", "license": "MIT", @@ -154,6 +170,17 @@ "node": ">=18.0.0" } }, + "node_modules/@azure/core-sse": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@azure/core-sse/-/core-sse-2.1.2.tgz", + "integrity": "sha512-yf+pFIu8yCzXu9RbH2+8kp9vITIKJLHgkLgFNA6hxiDHK3fxeP596cHUj4c8Cm8JlooaUnYdHmF84KCZt3jbmw==", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@azure/core-tracing": { "version": "1.1.2", "license": "MIT", @@ -185,6 +212,23 @@ "node": ">=18.0.0" } }, + "node_modules/@azure/openai": { + "version": "1.0.0-beta.12", + "resolved": "https://registry.npmjs.org/@azure/openai/-/openai-1.0.0-beta.12.tgz", + "integrity": "sha512-qKblxr6oVa8GsyNzY+/Ub9VmEsPYKhBrUrPaNEQiM+qrxnBPVm9kaeqGFFb/U78Q2zOabmhF9ctYt3xBW0nWnQ==", + "dependencies": { + "@azure-rest/core-client": "^1.1.7", + "@azure/core-auth": "^1.4.0", + "@azure/core-rest-pipeline": "^1.13.0", + "@azure/core-sse": "^2.0.0", + "@azure/core-util": "^1.4.0", + "@azure/logger": "^1.0.3", + "tslib": "^2.4.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@azure/search-documents": { "version": "12.0.0", "license": "MIT", @@ -18049,9 +18093,10 @@ "packages/plugins/azure-open-ai": { "version": "0.0.0", "dependencies": { + "@azure/openai": "^1.0.0-beta.6", "@ocular/types": "*", "@ocular/utils": "*", - "openai": "^4.29.2", + "dotenv": "^16.4.5", "tiktoken": "^1.0.13" }, "devDependencies": { @@ -18060,6 +18105,17 @@ "typescript": "^4.9.5" } }, + "packages/plugins/azure-open-ai/node_modules/dotenv": { + "version": "16.4.5", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", + "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "packages/plugins/azure-open-ai/node_modules/typescript": { "version": "4.9.5", "dev": true, diff --git a/packages/ocular-ui/lib/stream.ts b/packages/ocular-ui/lib/stream.ts new file mode 100644 index 00000000..01010d9c --- /dev/null +++ b/packages/ocular-ui/lib/stream.ts @@ -0,0 +1,46 @@ +class NdJsonParserStream extends TransformStream { + private buffer: string = ''; + constructor() { + let controller: TransformStreamDefaultController; + super({ + start: (_controller) => { + controller = _controller; + }, + transform: (chunk) => { + const jsonChunks = chunk.split('\n').filter(Boolean); + for (const jsonChunk of jsonChunks) { + try { + this.buffer += jsonChunk; + controller.enqueue(JSON.parse(this.buffer)); + this.buffer = ''; + } catch { + // Invalid JSON, wait for next chunk + } + } + }, + }); + } +} + +export function createReader(responseBody: ReadableStream | null) { + return responseBody + ?.pipeThrough(new TextDecoderStream()) + .pipeThrough(new NdJsonParserStream()) + .getReader(); +} + +export async function* readStream(reader: any): AsyncGenerator { + if (!reader) { + throw new Error('No response body or body is not readable'); + } + + let value: JSON | undefined; + let done: boolean; + while ((({ value, done } = await reader.read()), !done)) { + yield new Promise((resolve) => { + setTimeout(() => { + resolve(value as T); + }, 50); + }); + } +} diff --git a/packages/ocular-ui/pages/dashboard/search/results/index.tsx b/packages/ocular-ui/pages/dashboard/search/results/index.tsx index 5f54c8d5..5b08627a 100644 --- a/packages/ocular-ui/pages/dashboard/search/results/index.tsx +++ b/packages/ocular-ui/pages/dashboard/search/results/index.tsx @@ -6,6 +6,7 @@ import Header from "@/components/search/header"; import { useRouter } from "next/router"; import SearchResults from "@/components/search/search-results"; import { ApplicationContext } from "@/context/context" +import { createReader,readStream } from '@/lib/stream'; // Importing API End Points import api from "@/services/api" @@ -34,19 +35,38 @@ const selectedDate = useMemo(() => { useEffect(() => { setIsLoadingResults(true); setIsLoadingCopilot(true); + // Search api.search.search(router.query.q, selectedResultSources, selectedDate) .then(data => { - setAiResults(data.data.chat_completion.content); - setai_citations(data.data.chat_completion.citations); - setIsLoadingCopilot(false); setSearchResults(data.data.hits); setResultSources(data.data.sources); setIsLoadingResults(false); }) .catch(error => { - setIsLoadingResults(false); - setIsLoadingCopilot(false); + setIsLoadingResults(false); }); + // Copilot + const stream = true; + api.search.ask(router.query.q, selectedResultSources, selectedDate, stream) + .then(async response => { + setIsLoadingCopilot(false); + if(stream){ + const reader = createReader(response.body); + const chunks = readStream(reader); + for await (const chunk of chunks) { + setAiResults(chunk.chat_completion.content); + setai_citations(chunk.chat_completion.citations); + } + } else { + setAiResults(response.data.chat_completion.content); + setai_citations(response.data.chat_completion.citations); + setIsLoadingCopilot(false); + } + }) + .catch(error => { + console.error(error); + setIsLoadingCopilot(false); + }); }, [router.query.q, selectedResultSources, setResultSources, selectedDate]); return ( diff --git a/packages/ocular-ui/services/api.js b/packages/ocular-ui/services/api.js index 1b64f062..ee88a691 100644 --- a/packages/ocular-ui/services/api.js +++ b/packages/ocular-ui/services/api.js @@ -1,4 +1,5 @@ import ocularRequest from './request'; +import ocularStreamingRequest from './streaming-request'; export default { apps: { @@ -90,12 +91,28 @@ export default { }, }, search: { + ask(q, sources, date,stream=false) { + const path = `/ask`; + const body = { + context: { + top: 5, + stream:stream + }, + q: q + }; + if (date.from || date.to) { + body.context.date = date; + } + if (sources && sources.length > 0) { + body.context.sources = sources; + } + return ocularStreamingRequest("POST", path, body,stream); + }, search(q, sources, date) { const path = `/search`; const body = { context: { top: 20, - ai_completion: true // date: date, }, q: q diff --git a/packages/ocular-ui/services/streaming-request.ts b/packages/ocular-ui/services/streaming-request.ts new file mode 100644 index 00000000..c04c1a52 --- /dev/null +++ b/packages/ocular-ui/services/streaming-request.ts @@ -0,0 +1,48 @@ +// Ocular uses Axios on all its services to make HTTP requests. However Axios does support client streaming in the +// browser so we created this stream api as a workaround. The streaming api is a simple wrapper around the fetch api that allows you to make streaming requests to the server. +export const OCULAR_BACKEND_URL = + process.env.OCULAR_BACKEND_URL || 'http://localhost:9000/v1'; + +export default async function ocularStreamingRequest( + method: + | 'GET' + | 'POST' + | 'PUT' + | 'DELETE' + | 'PATCH' + | 'OPTIONS' + | 'HEAD' + | 'CONNECT' + | 'TRACE', + path = '', + payload = {}, + stream = false, + cancelTokenSource: AbortController | null +) { + const headers = { + 'Access-Control-Allow-Origin': '*', + 'Content-Type': 'application/json', + }; + + const options = { + method, + headers, + credentials: 'include', + body: JSON.stringify(payload), + signal: cancelTokenSource ? cancelTokenSource.signal : undefined, + }; + + if (method === 'GET') { + delete options.body; + } + + const response = await fetch(`${OCULAR_BACKEND_URL}${path}`, options); + + if (!response.ok) { + const error = await response.text(); + console.error('Error', error); + throw new Error(error); + } + + return response; +} diff --git a/packages/ocular/src/api/routes/member/index.ts b/packages/ocular/src/api/routes/member/index.ts index 0be45a06..ff22aa5f 100644 --- a/packages/ocular/src/api/routes/member/index.ts +++ b/packages/ocular/src/api/routes/member/index.ts @@ -1,24 +1,24 @@ -import { Router } from "express" +import { Router } from "express"; -import auth from "./auth" -import middlewares from "../../middlewares" +import auth from "./auth"; +import middlewares from "../../middlewares"; // import apps from "./apps" // import components from "./member/components" -import search from "./search" -import chat from "./chat" -// import teams from "./member/teams" +import search from "./search"; +import chat from "./chat"; +import { ask } from "./search"; // import organisation from "./member/organisation" export default (app, container, config) => { - const route = Router() - app.use("/",route) + const route = Router(); + app.use("/", route); // Unauthenticated Routes - auth(route) - + auth(route); + // Authenticated routes - route.use(middlewares.authenticate()) - route.use(middlewares.registeredLoggedinUser) + route.use(middlewares.authenticate()); + route.use(middlewares.registeredLoggedinUser); // // Authenticated routes // route.use(middlewares.authenticate()) @@ -27,11 +27,11 @@ export default (app, container, config) => { // apps(route) // components(route) // invites(route) - chat(route) - search(route) - // teams(route) + chat(route); + search(route); + ask(route); // organisation(route) // users(route) - return app -} \ No newline at end of file + return app; +}; diff --git a/packages/ocular/src/api/routes/member/search/ask.ts b/packages/ocular/src/api/routes/member/search/ask.ts new file mode 100644 index 00000000..34ba3969 --- /dev/null +++ b/packages/ocular/src/api/routes/member/search/ask.ts @@ -0,0 +1,91 @@ +import { + ValidateNested, + IsOptional, + IsString, + IsBoolean, + IsEnum, + IsNumber, + IsDateString, +} from "class-validator"; +import { SearchService } from "../../../../services"; +import { validator } from "@ocular/utils"; +import { AppNameDefinitions, DocType } from "@ocular/types"; +import { Type } from "class-transformer"; + +export default async (req, res) => { + try { + console.log("Search API: Ask Request Received", req.body); + const { q, context } = await validator(PostAskReq, req.body); + const searchApproach = req.scope.resolve("askRetrieveReadApproache"); + const loggedInUser = req.scope.resolve("loggedInUser"); + if (context && context.stream) { + const chunks = await searchApproach.runWithStreaming( + q, + (context as any) ?? {} + ); + // Stream the Search Results + for await (const chunk of chunks) { + res.write(JSON.stringify(chunk) + "\n"); + } + res.status(200); + res.end(); + } else { + const results = await searchApproach.run( + loggedInUser.organisation_id.toLowerCase().substring(4), + q, + (context as any) ?? {} + ); + return res.status(200).send(results); + } + } catch (_error: unknown) { + console.log(_error); + return res.status(500).send(`Error: Failed to execute SearchApproach.`); + } +}; + +class PostAskReq { + @IsString() + q: string; + + @IsOptional() + @ValidateNested() + @Type(() => AskContextReq) + context?: AskContextReq; +} + +class AskContextReq { + @IsOptional() + @IsBoolean() + suggest_followup_questions?: boolean; + + @IsOptional() + @IsNumber() + top?: number; + + @IsOptional() + @IsEnum(AppNameDefinitions, { each: true }) + sources?: Set; + + @IsOptional() + @IsEnum(DocType, { each: true }) + types?: Set; + + @IsOptional() + @ValidateNested() + @Type(() => DateRange) + date: DateRange; + + @IsOptional() + @IsBoolean() + stream?: boolean; +} + +class DateRange { + @IsOptional() + @IsDateString() + from: string; + + @IsOptional() + @IsDateString() + to: string; +} diff --git a/packages/ocular/src/api/routes/member/search/search.ts b/packages/ocular/src/api/routes/member/search/search.ts index a8c18957..01095111 100644 --- a/packages/ocular/src/api/routes/member/search/search.ts +++ b/packages/ocular/src/api/routes/member/search/search.ts @@ -12,26 +12,21 @@ import { validator } from "@ocular/utils"; import { AppNameDefinitions, DocType } from "@ocular/types"; import { Type } from "class-transformer"; -// TODO: The SearchApproach used currently is hardcoded to be AskRetrieveReadApproach. -// Improve to dynamically resolve the SearchApproaches based on the approach enum. export default async (req, res) => { try { - console.log("PostSearchReq", req.body); - const validated = await validator(PostSearchReq, req.body); - const { q, context } = validated; + const { q, context } = await validator(PostSearchReq, req.body); + const searchService = req.scope.resolve("searchService"); const loggedInUser = req.scope.resolve("loggedInUser"); - const searchApproach = req.scope.resolve("askRetrieveReadApproache"); - // Add Organization Id And User ID As Required Fields - console.log("context", context); - const results = await searchApproach.run( - loggedInUser.organisation_id.toLowerCase().substring(4), - q, - (context as any) ?? {} + const searchResults = await searchService.searchDocuments(null, q, context); + const sources = new Set( + searchResults.map( + (hit) => hit.documentMetadata?.source as AppNameDefinitions + ) ); - return res.status(200).send(results); + return res.status(200).send({ hits: searchResults, sources: [...sources] }); } catch (_error: unknown) { console.log(_error); - return res.status(500).send(`Error: Failed to execute SearchApproach.`); + return res.status(500).send(`Error: Failed to Search .`); } }; @@ -46,18 +41,6 @@ class PostSearchReq { } class SearchContextReq { - @IsOptional() - @IsBoolean() - ai_completion?: boolean; - - @IsOptional() - @IsString() - prompt_template?: string; - - @IsOptional() - @IsBoolean() - suggest_followup_questions?: boolean; - @IsOptional() @IsNumber() top?: number; diff --git a/packages/ocular/src/approaches/ask-retrieve-read.ts b/packages/ocular/src/approaches/ask-retrieve-read.ts index 486ee529..615a930f 100644 --- a/packages/ocular/src/approaches/ask-retrieve-read.ts +++ b/packages/ocular/src/approaches/ask-retrieve-read.ts @@ -60,60 +60,96 @@ export default class AskRetrieveThenRead implements ISearchApproach { if (context?.ai_completion) { context.retrieve_chunks = true; } - let searchResults: SearchResults = await this.searchService_.search( + let chunks = await this.searchService_.searchChunks( null, userQuery, context ); let message = null; - if (context && context.ai_completion) { - // Initial System Message - // const prompt = context?.prompt_template || SYSTEM_CHAT_TEMPLATE; - - const prompt = SYSTEM_CHAT_TEMPLATE; - const tokens = this.openai_.getChatModelTokenCount(prompt); - const messageBuilder = new MessageBuilder(prompt, tokens); - - // Use Top 3 Sources To Generate AI Completion. - // TODO: Use More Sophisticated Logic To Select Sources. - const sources = searchResults.chat_completion.citations - .map((c) => c.content) - .join(""); - const userContent = `${userQuery}\nSources:\n${sources}`; - - const roleTokens: number = this.openai_.getChatModelTokenCount("user"); - const messageTokens: number = - this.openai_.getChatModelTokenCount(userContent); - messageBuilder.appendMessage( - "user", - userContent, - 1, - roleTokens + messageTokens - ); - - // Add shots/samples. This helps model to mimic response and make sure they match rules laid out in system message. - // messageBuilder.appendMessage('assistant', QUESTION); - // messageBuilder.appendMessage('user', ANSWER) - const messages = messageBuilder.messages; - const chatCompletion = await this.openai_.completeChat(messages); - searchResults.chat_completion.content = chatCompletion; - } + // Initial System Message + // const prompt = context?.prompt_template || SYSTEM_CHAT_TEMPLATE; + + const prompt = SYSTEM_CHAT_TEMPLATE; + const tokens = this.openai_.getChatModelTokenCount(prompt); + const messageBuilder = new MessageBuilder(prompt, tokens); + + // Use Top 3 Sources To Generate AI Completion. + // TODO: Use More Sophisticated Logic To Select Sources. + const sources = chunks.map((c) => c.content).join(""); + const userContent = `${userQuery}\nSources:\n${sources}`; - // Add Sources To The Search Results - const sources = new Set( - searchResults.hits.map( - (hit) => hit.documentMetadata?.source as AppNameDefinitions - ) + const roleTokens: number = this.openai_.getChatModelTokenCount("user"); + const messageTokens: number = + this.openai_.getChatModelTokenCount(userContent); + messageBuilder.appendMessage( + "user", + userContent, + 1, + roleTokens + messageTokens ); - searchResults.sources = [...sources]; + + // Add shots/samples. This helps model to mimic response and make sure they match rules laid out in system message. + // messageBuilder.appendMessage('assistant', QUESTION); + // messageBuilder.appendMessage('user', ANSWER) + const messages = messageBuilder.messages; + const chatCompletion = await this.openai_.completeChat(messages); + let searchResults: SearchResults = { + chat_completion: { content: chatCompletion, citations: chunks }, + }; + return searchResults; } async *runWithStreaming( - query: string, + userQuery: string, context?: SearchContext - ): AsyncGenerator { - throw new Error("Streaming not supported for this approach."); + ): AsyncGenerator { + let chunks = await this.searchService_.searchChunks( + null, + userQuery, + context + ); + + let message = null; + // Initial System Message + // const prompt = context?.prompt_template || SYSTEM_CHAT_TEMPLATE; + + const prompt = SYSTEM_CHAT_TEMPLATE; + const tokens = this.openai_.getChatModelTokenCount(prompt); + const messageBuilder = new MessageBuilder(prompt, tokens); + + // Use Top 3 Sources To Generate AI Completion. + // TODO: Use More Sophisticated Logic To Select Sources. + const sources = chunks.map((c) => c.content).join(""); + const userContent = `${userQuery}\nSources:\n${sources}`; + + const roleTokens: number = this.openai_.getChatModelTokenCount("user"); + const messageTokens: number = + this.openai_.getChatModelTokenCount(userContent); + messageBuilder.appendMessage( + "user", + userContent, + 1, + roleTokens + messageTokens + ); + + // Add shots/samples. This helps model to mimic response and make sure they match rules laid out in system message. + // messageBuilder.appendMessage('assistant', QUESTION); + // messageBuilder.appendMessage('user', ANSWER) + const messages = messageBuilder.messages; + const chatCompletion = await this.openai_.completeChatWithStreaming( + messages + ); + let searchResults: SearchResults = { + chat_completion: { + content: "", + citations: chunks, + }, + }; + for await (const chunk of chatCompletion) { + searchResults.chat_completion.content = chunk; + yield searchResults; + } } } diff --git a/packages/ocular/src/approaches/chat-retrieve-read-retrieve.ts b/packages/ocular/src/approaches/chat-retrieve-read-retrieve.ts index 03e16b15..3f02a5ea 100644 --- a/packages/ocular/src/approaches/chat-retrieve-read-retrieve.ts +++ b/packages/ocular/src/approaches/chat-retrieve-read-retrieve.ts @@ -169,7 +169,8 @@ export default class ChatReadRetrieveRead implements IChatApproach { // STEP 2: Retrieve relevant documents from the search index with the GPT optimized query // ----------------------------------------------------------------------- - let hits = await this.searchService_.search(null, queryText, context); + // let hits = await this.searchService_.search(null, queryText, context); + let hits: SearchResults = { hits: [] }; // hits = hits.filter((doc) => doc !== null); // hits diff --git a/packages/ocular/src/services/search.ts b/packages/ocular/src/services/search.ts index d4558b3c..050edc20 100644 --- a/packages/ocular/src/services/search.ts +++ b/packages/ocular/src/services/search.ts @@ -9,6 +9,7 @@ import { SearchResults, SearchChunk, IEmbedderInterface, + SearchDocument, } from "@ocular/types"; import { AzureOpenAIOptions, @@ -47,11 +48,11 @@ class SearchService extends AbstractSearchService { this.embedderService_ = container.embedderService; } - async search( + async searchDocuments( indexName?: string, query?: string, context?: SearchContext - ): Promise { + ): Promise { indexName = indexName ? indexName : this.defaultIndexName_; // Compute Embeddings For The Query @@ -72,23 +73,32 @@ class SearchService extends AbstractSearchService { const docMetadata = await this.documentMetadataService_.list(docIds); // Join The Document Metadata With The Search Results - searchResults.hits = searchResults.hits.map((hit) => { + const hits = searchResults.hits.map((hit) => { const metadata = docMetadata.find((doc) => doc.id === hit.documentId); return { ...hit, documentMetadata: { ...metadata } }; }); + return hits; + } + + async searchChunks( + indexName?: string, + query?: string, + context?: SearchContext + ): Promise { + indexName = indexName ? indexName : this.defaultIndexName_; + + // Compute Embeddings For The Query + const queryVector = await this.embedderService_.createEmbeddings([query!]); + + // Search the index for the query vector + const chunks: SearchChunk[] = + await this.vectorDBService_.searchDocumentChunks( + indexName, + queryVector[0], + context + ); - // Retrieve Chunks Top Indipendent Chunks Irrespective of the Document - if (context.retrieve_chunks) { - searchResults.chat_completion = {}; - searchResults.chat_completion.citations = - await this.vectorDBService_.searchDocumentChunks( - indexName, - queryVector[0], - context - ); - } - console.log("Search Results", searchResults); - return searchResults; + return chunks; } } diff --git a/packages/plugins/azure-open-ai/package.json b/packages/plugins/azure-open-ai/package.json index 74f14df9..79f36402 100644 --- a/packages/plugins/azure-open-ai/package.json +++ b/packages/plugins/azure-open-ai/package.json @@ -20,7 +20,7 @@ "dependencies": { "@ocular/types": "*", "@ocular/utils": "*", - "openai": "^4.29.2", + "dotenv": "^16.4.5", "tiktoken": "^1.0.13" } } diff --git a/packages/plugins/azure-open-ai/src/__tests__/open-ai b/packages/plugins/azure-open-ai/src/__tests__/open-ai index 181ac09b..f9902d3f 100644 --- a/packages/plugins/azure-open-ai/src/__tests__/open-ai +++ b/packages/plugins/azure-open-ai/src/__tests__/open-ai @@ -1,49 +1,76 @@ -import { OpenAI } from 'openai'; -import OpenAIService from '../services/open-ai'; -describe('OpenAIService', () => { - let service; +import { OpenAI } from "openai"; +import OpenAIService from "../services/open-ai"; +describe("OpenAIService", () => { + let service; beforeEach(() => { - service = new OpenAIService({}, { - open_ai_version: "2023-05-15", - endpoint: process.env.AZURE_OPEN_AI_ENDPOINT, - embedding_deployment_name: process.env.AZURE_OPEN_AI_EMBEDDER_DEPLOYMENT_NAME, - embedding_model: process.env.AZURE_OPEN_AI_EMBEDDING_MODEL, - open_ai_key: process.env.AZURE_OPEN_AI_KEY, - chat_deployment: process.env.AZURE_OPEN_AI_CHAT_DEPLOYMENT_NAME, - chat_model: process.env.AZURE_OPEN_AI_CHAT_MODEL - }); + service = new OpenAIService( + { + rateLimiterService: { + getRequestQueue: jest.fn(), + }, + }, + { + open_ai_version: "2023-05-15", + endpoint: process.env.AZURE_OPEN_AI_ENDPOINT, + embedding_deployment_name: + process.env.AZURE_OPEN_AI_EMBEDDER_DEPLOYMENT_NAME, + embedding_model: process.env.AZURE_OPEN_AI_EMBEDDING_MODEL, + open_ai_key: process.env.AZURE_OPEN_AI_KEY, + chat_deployment: process.env.AZURE_OPEN_AI_CHAT_DEPLOYMENT_NAME, + chat_model: process.env.AZURE_OPEN_AI_CHAT_MODEL, + } + ); }); - - // it('should create embeddings', async () => { - // const doc = { content: 't' }; + // it("should create embeddings", async () => { + // const doc = { content: "t" }; // const embeddings = await service.createEmbeddings(doc); // expect(embeddings).toEqual([1, 2, 3]); // expect(OpenAI).toHaveBeenCalledWith({ - // apiKey: 'test-key', - // defaultQuery: { 'api-version': 'test-version' }, - // defaultHeaders: { 'api-key': 'test-key' }, - // baseURL: 'test-endpoint/openai/deployments/test-deployment' + // apiKey: "test-key", + // defaultQuery: { "api-version": "test-version" }, + // defaultHeaders: { "api-key": "test-key" }, + // baseURL: "test-endpoint/openai/deployments/test-deployment", // }); // expect(service.openai_.embeddings.create).toHaveBeenCalledWith({ - // input: 'test content', - // model: 'test-model' + // input: "test content", + // model: "test-model", // }); // }); - // it('should return empty array if doc content is not provided', async () => { + // it("should return empty array if doc content is not provided", async () => { // const doc = {}; // const embeddings = await service.createEmbeddings(doc); // expect(embeddings).toEqual([]); // }); - it('should complete chat', async () => { + it("should complete chat", async () => { const messages = [ - {role: 'system', content: 'You are a helpful assistant.'}, - {role: 'user', content: 'Translate the following English text to French: "Hello, how are you?"'} + { role: "system", content: "You are a helpful assistant." }, + { + role: "user", + content: + 'Translate the following English text to French: "Hello, how are you?"', + }, ]; const result = await service.completeChat(messages); - expect(result).toEqual( "\"Bonjour, comment vas-tu ?\""); + expect(result).toEqual('"Bonjour, comment vas-tu ?"'); }); -}); \ No newline at end of file + // it("should complete chat with stream", async () => { + // const messages = [ + // { role: "system", content: "You are a helpful assistant." }, + // { + // role: "user", + // content: + // 'Translate the following English text to French: "Hello, how are you?"', + // }, + // ]; + // const stream = service.completeChatWithStream(messages); + // stream.on("data", (data) => { + // console.log(data); + // expect("result").toEqual('"Bonjour, comment vas-tu ?"'); + // }); + // expect("result").toEqual('"Bonjour, comment vas-tu ?"'); + // }); +}); diff --git a/packages/plugins/azure-open-ai/src/services/open-ai.ts b/packages/plugins/azure-open-ai/src/services/open-ai.ts index cd8c16ea..c3f94474 100644 --- a/packages/plugins/azure-open-ai/src/services/open-ai.ts +++ b/packages/plugins/azure-open-ai/src/services/open-ai.ts @@ -112,6 +112,28 @@ export default class AzureOpenAIService extends AbstractLLMService { } } + async *completeChatWithStreaming( + messages: Message[] + ): AsyncGenerator { + try { + const result = await this.chatClient_.chat.completions.create({ + model: this.chatModel_, + stream: true, + messages, + temperature: 0.3, + max_tokens: 1024, + n: 1, + }); + let content = ""; + for await (const chunk of result) { + content += chunk.choices[0]?.delta.content ?? ""; + yield content; + } + } catch (error) { + console.log("Azure Open AI: Error", error); + } + } + getChatModelTokenCount(content: string): number { const encoder = encoding_for_model(this.chatModel_ as TiktokenModel); let tokens = 2; diff --git a/packages/plugins/open-ai/src/__tests__/open-ai b/packages/plugins/open-ai/src/__tests__/open-ai deleted file mode 100644 index e58a2ea0..00000000 --- a/packages/plugins/open-ai/src/__tests__/open-ai +++ /dev/null @@ -1,52 +0,0 @@ -import { OpenAI } from 'openai'; -import OpenAIService from '../services/open-ai'; - -describe('OpenAIService', () => { - let service; - - beforeEach(() => { - service = new OpenAIService({ - rateLimiterService: { - getRequestQueue: jest.fn().mockReturnValue({ - removeTokens: jest.fn() - }) - } - }, { - embedding_model: process.env.OPEN_AI_EMBEDDING_MODEL, - open_ai_key: process.env.OPEN_AI_KEY, - chat_model: process.env.OPEN_AI_CHAT_MODEL - }); - }); - - it('should create embeddings', async () => { - const doc = { content: 't' }; - const embeddings = await service.createEmbeddings("test content"); - expect(embeddings).toEqual([1, 2, 3]); - expect(OpenAI).toHaveBeenCalledWith({ - apiKey: 'test-key', - defaultQuery: { 'api-version': 'test-version' }, - defaultHeaders: { 'api-key': 'test-key' }, - baseURL: 'test-endpoint/openai/deployments/test-deployment' - }); - expect(service.openai_.embeddings.create).toHaveBeenCalledWith({ - input: 'test content', - model: 'test-model' - }); - }); - - it('should return empty array if doc content is not provided', async () => { - const doc = {}; - const embeddings = await service.createEmbeddings("test"); - expect(embeddings).toEqual([]); - }); - - // it('should complete chat', async () => { - // const messages = [ - // {role: 'system', content: 'You are a helpful assistant.'}, - // {role: 'user', content: 'Translate the following English text to French: "Hello, how are you?"'} - // ]; - // const result = await service.completeChat(messages); - // expect(result).toEqual( "\"Bonjour, comment vas-tu ?\""); - // }); - -}); \ No newline at end of file diff --git a/packages/plugins/open-ai/src/__tests__/open-ai.ts b/packages/plugins/open-ai/src/__tests__/open-ai.ts new file mode 100644 index 00000000..94e09973 --- /dev/null +++ b/packages/plugins/open-ai/src/__tests__/open-ai.ts @@ -0,0 +1,64 @@ +import { OpenAI } from "openai"; +import OpenAIService from "../services/open-ai"; +import dotenv from "dotenv"; +dotenv.config({ path: "../../ocular/.env.dev" }); + +describe("OpenAIService", () => { + let service; + + beforeEach(() => { + service = new OpenAIService( + { + rateLimiterService: { + getRequestQueue: jest.fn().mockReturnValue({ + removeTokens: jest.fn(), + }), + }, + }, + { + embedding_model: process.env.OPEN_AI_EMBEDDING_MODEL, + open_ai_key: process.env.OPEN_AI_KEY, + chat_model: process.env.OPEN_AI_CHAT_MODEL, + } + ); + }); + + // it("should create embeddings", async () => { + // const doc = { content: "t" }; + // const embeddings = await service.createEmbeddings("test content"); + // expect(embeddings).toEqual([1, 2, 3]); + // expect(OpenAI).toHaveBeenCalledWith({ + // apiKey: "test-key", + // defaultQuery: { "api-version": "test-version" }, + // defaultHeaders: { "api-key": "test-key" }, + // baseURL: "test-endpoint/openai/deployments/test-deployment", + // }); + // expect(service.openai_.embeddings.create).toHaveBeenCalledWith({ + // input: "test content", + // model: "test-model", + // }); + // }); + + // it("should return empty array if doc content is not provided", async () => { + // const doc = {}; + // const embeddings = await service.createEmbeddings("test"); + // expect(embeddings).toEqual([]); + // }); + + it("should complete chat with stream", async () => { + const messages = [ + { role: "system", content: "You are a helpful assistant." }, + { + role: "user", + content: + 'Translate the following English text to French: "Hello, how are you?"', + }, + ]; + const stream = service.completeChatWithStreaming(messages); + let result = ""; + for await (const chunk of stream) { + result += chunk; + } + expect(result).toEqual('"Bonjour, comment vas-tu ?"'); + }); +}); diff --git a/packages/plugins/open-ai/src/services/open-ai.ts b/packages/plugins/open-ai/src/services/open-ai.ts index e767058e..9be8856b 100644 --- a/packages/plugins/open-ai/src/services/open-ai.ts +++ b/packages/plugins/open-ai/src/services/open-ai.ts @@ -1,71 +1,103 @@ -import { IndexableDocument, AbstractLLMService, IndexableDocChunk, Message, PluginNameDefinitions } from "@ocular/types"; -import { OpenAI } from 'openai'; -import { encoding_for_model, type TiktokenModel } from 'tiktoken'; +import { + IndexableDocument, + AbstractLLMService, + IndexableDocChunk, + Message, + PluginNameDefinitions, +} from "@ocular/types"; +import { OpenAI } from "openai"; +import { encoding_for_model, type TiktokenModel } from "tiktoken"; import { RateLimiterService } from "@ocular/ocular"; -import { RateLimiterQueue } from "rate-limiter-flexible" +import { RateLimiterQueue } from "rate-limiter-flexible"; export default class OpenAIService extends AbstractLLMService { + static identifier = PluginNameDefinitions.OPENAI; - static identifier = PluginNameDefinitions.OPENAI - - protected openAIKey_: string - protected openAI_: OpenAI - protected embeddingModel_: string - protected chatModel_: string - protected tokenLimit_:number = 4096 + protected openAIKey_: string; + protected openAI_: OpenAI; + protected embeddingModel_: string; + protected chatModel_: string; + protected tokenLimit_: number = 4096; protected rateLimiterService_: RateLimiterService; - protected requestQueue_: RateLimiterQueue + protected requestQueue_: RateLimiterQueue; constructor(container, options) { - super(arguments[0],options) + super(arguments[0], options); - // Rate Limiter Service + // Rate Limiter Service this.rateLimiterService_ = container.rateLimiterService; - this.requestQueue_ = this.rateLimiterService_.getRequestQueue(PluginNameDefinitions.OPENAI); + this.requestQueue_ = this.rateLimiterService_.getRequestQueue( + PluginNameDefinitions.OPENAI + ); // Models - this.embeddingModel_ = options.embedding_model - this.chatModel_ = options.chat_model - + this.embeddingModel_ = options.embedding_model; + this.chatModel_ = options.chat_model; + // Chat Deployment - this.openAIKey_ = options.open_ai_key + this.openAIKey_ = options.open_ai_key; this.openAI_ = new OpenAI({ - apiKey: this.openAIKey_ || "" - }) + apiKey: this.openAIKey_ || "", + }); } - async createEmbeddings(text:string): Promise { - try{ + async createEmbeddings(text: string): Promise { + try { // Rate Limiter Limits On Token Count - const tokenCount = this.getChatModelTokenCount(text) - await this.requestQueue_.removeTokens(tokenCount,PluginNameDefinitions.OPENAI) + const tokenCount = this.getChatModelTokenCount(text); + await this.requestQueue_.removeTokens( + tokenCount, + PluginNameDefinitions.OPENAI + ); const result = await this.openAI_.embeddings.create({ model: this.embeddingModel_, - input: text - }) + input: text, + }); return result.data[0].embedding; - } catch(error){ - console.log("Open AI: Error",error) + } catch (error) { + console.log("Open AI: Error", error); } } async completeChat(messages: Message[]): Promise { - try{ + try { const result = await this.openAI_.chat.completions.create({ model: this.chatModel_, messages, - temperature: 0.3, + temperature: 0.3, max_tokens: 1024, n: 1, }); - console.log("Result Open AI",result.choices[0].message.content) + console.log("Result Open AI", result.choices[0].message.content); return result.choices[0].message.content; - }catch(error){ - console.log("Open AI: Error",error) + } catch (error) { + console.log("Open AI: Error", error); + } + } + + async *completeChatWithStreaming( + messages: Message[] + ): AsyncGenerator { + try { + const result = await this.openAI_.chat.completions.create({ + model: this.chatModel_, + stream: true, + messages, + temperature: 0.3, + max_tokens: 1024, + n: 1, + }); + let content = ""; + for await (const chunk of result) { + content += chunk.choices[0]?.delta.content ?? ""; + yield content; + } + } catch (error) { + console.log("Azure Open AI: Error", error); } } - getChatModelTokenCount(content : string): number { + getChatModelTokenCount(content: string): number { const encoder = encoding_for_model(this.chatModel_ as TiktokenModel); let tokens = 2; for (const value of Object.values(content)) { @@ -78,4 +110,4 @@ export default class OpenAIService extends AbstractLLMService { getTokenLimit(): number { return this.tokenLimit_; } -} \ No newline at end of file +} diff --git a/packages/types/src/common/search.ts b/packages/types/src/common/search.ts index 17375a6d..3a8a4ad2 100644 --- a/packages/types/src/common/search.ts +++ b/packages/types/src/common/search.ts @@ -46,7 +46,7 @@ export interface SearchResults { content?: string; citations?: SearchChunk[]; }; - hits: SearchDocument[]; + hits?: SearchDocument[]; sources?: AppNameDefinitions[]; } diff --git a/packages/types/src/interfaces/approach-interface.ts b/packages/types/src/interfaces/approach-interface.ts index 1805bd45..7926852e 100644 --- a/packages/types/src/interfaces/approach-interface.ts +++ b/packages/types/src/interfaces/approach-interface.ts @@ -23,7 +23,7 @@ export interface ISearchApproach { runWithStreaming( query: string, context?: SearchContext - ): AsyncGenerator; + ): AsyncGenerator; } export interface IChatApproach { diff --git a/packages/types/src/interfaces/llm-interface.ts b/packages/types/src/interfaces/llm-interface.ts index 35949b9b..660a91cc 100644 --- a/packages/types/src/interfaces/llm-interface.ts +++ b/packages/types/src/interfaces/llm-interface.ts @@ -1,12 +1,13 @@ -import { AutoflowContainer, Message } from '../common'; -import { IndexableDocument,IndexableDocChunk } from '../common/document'; -import { TransactionBaseService } from './transaction-base-service'; +import { AutoflowContainer, Message } from "../common"; +import { IndexableDocument, IndexableDocChunk } from "../common/document"; +import { TransactionBaseService } from "./transaction-base-service"; export interface ILLMInterface extends TransactionBaseService { - createEmbeddings(text:string): Promise ; + createEmbeddings(text: string): Promise; completeChat(messages: Message[]): Promise; + completeChatWithStreaming(messages: Message[]): AsyncGenerator; getChatModelTokenCount(content: string): number; - getTokenLimit(): number + getTokenLimit(): number; } /** @@ -16,25 +17,28 @@ export abstract class AbstractLLMService extends TransactionBaseService implements ILLMInterface { - static _isLLM = true - static identifier: string + static _isLLM = true; + static identifier: string; static isLLMService(object): boolean { - return object?.constructor?._isLLM + return object?.constructor?._isLLM; } getIdentifier(): string { - return (this.constructor as any).identifier + return (this.constructor as any).identifier; } protected constructor( protected readonly container: AutoflowContainer, protected readonly config?: Record // eslint-disable-next-line @typescript-eslint/no-empty-function ) { - super(container, config) + super(container, config); } - abstract createEmbeddings(text:string): Promise ; + abstract createEmbeddings(text: string): Promise; abstract completeChat(messages: Message[]): Promise; + abstract completeChatWithStreaming( + messages: Message[] + ): AsyncGenerator; abstract getChatModelTokenCount(content: string): number; - abstract getTokenLimit(): number -} \ No newline at end of file + abstract getTokenLimit(): number; +} diff --git a/packages/types/src/interfaces/search-interface.ts b/packages/types/src/interfaces/search-interface.ts index ecfe3224..1765ae54 100644 --- a/packages/types/src/interfaces/search-interface.ts +++ b/packages/types/src/interfaces/search-interface.ts @@ -1,15 +1,22 @@ import { IndexableDocChunk, Message, + SearchChunk, SearchContext, + SearchDocument, SearchResults, } from "../common"; export interface ISearchService { - search( + searchDocuments( indexName: string, query: string, context?: SearchContext - ): Promise; + ): Promise; + searchChunks( + indexName: string, + query: string, + context?: SearchContext + ): Promise; } export abstract class AbstractSearchService implements ISearchService { @@ -29,9 +36,14 @@ export abstract class AbstractSearchService implements ISearchService { protected constructor(container, options) { this.options_ = options; } - abstract search( + abstract searchDocuments( + indexName: string, + query: string, + context?: SearchContext + ): Promise; + abstract searchChunks( indexName: string, query: string, context?: SearchContext - ): Promise; + ): Promise; }