-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
google[minor]: Move Vertex embeddings to integration package #12
base: cloned_main_de3a4
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -212,7 +212,9 @@ export class GoogleVertexAILLMConnection< | |||||||
} | ||||||||
|
||||||||
const projectId = await this.client.getProjectId(); | ||||||||
|
||||||||
console.log( | ||||||||
`https://${this.endpoint}/v1/projects/${projectId}/locations/${this.location}/publishers/google/models/${this.model}:${method}` | ||||||||
); | ||||||||
Comment on lines
+215
to
+217
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed that a console.log statement has been added to print the URL method. While this might be helpful for debugging, it's generally not recommended to leave console.log statements in production code. Additionally, this log might expose sensitive information about the project ID, location, and model. Consider removing this console.log statement or replacing it with a more appropriate logging mechanism that respects different environments (development vs. production) and doesn't expose sensitive data.
Comment on lines
+215
to
+217
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove or replace The - console.log(
- `https://${this.endpoint}/v1/projects/${projectId}/locations/${this.location}/publishers/google/models/${this.model}:${method}`
- ); Committable suggestion
Suggested change
|
||||||||
return `https://${this.endpoint}/v1/projects/${projectId}/locations/${this.location}/publishers/google/models/${this.model}:${method}`; | ||||||||
} | ||||||||
|
||||||||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,202 @@ | ||||||||||||||||||
import { Embeddings, EmbeddingsParams } from "@langchain/core/embeddings"; | ||||||||||||||||||
import { | ||||||||||||||||||
AsyncCaller, | ||||||||||||||||||
AsyncCallerCallOptions, | ||||||||||||||||||
} from "@langchain/core/utils/async_caller"; | ||||||||||||||||||
import { chunkArray } from "@langchain/core/utils/chunk_array"; | ||||||||||||||||||
import { GoogleAIConnection } from "./connection.js"; | ||||||||||||||||||
import { ApiKeyGoogleAuth, GoogleAbstractedClient } from "./auth.js"; | ||||||||||||||||||
import { GoogleAIModelRequestParams, GoogleConnectionParams } from "./types.js"; | ||||||||||||||||||
import { getEnvironmentVariable } from "@langchain/core/utils/env"; | ||||||||||||||||||
|
||||||||||||||||||
class EmbeddingsConnection< | ||||||||||||||||||
CallOptions extends AsyncCallerCallOptions, | ||||||||||||||||||
AuthOptions | ||||||||||||||||||
> extends GoogleAIConnection< | ||||||||||||||||||
CallOptions, | ||||||||||||||||||
GoogleEmbeddingsInstance[], | ||||||||||||||||||
AuthOptions | ||||||||||||||||||
> { | ||||||||||||||||||
convertSystemMessageToHumanContent: boolean | undefined; | ||||||||||||||||||
|
||||||||||||||||||
constructor( | ||||||||||||||||||
fields: GoogleConnectionParams<AuthOptions> | undefined, | ||||||||||||||||||
caller: AsyncCaller, | ||||||||||||||||||
client: GoogleAbstractedClient, | ||||||||||||||||||
streaming: boolean | ||||||||||||||||||
) { | ||||||||||||||||||
super(fields, caller, client, streaming); | ||||||||||||||||||
} | ||||||||||||||||||
Comment on lines
+22
to
+29
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove unnecessary constructor. The constructor in - constructor(
- fields: GoogleConnectionParams<AuthOptions> | undefined,
- caller: AsyncCaller,
- client: GoogleAbstractedClient,
- streaming: boolean
- ) {
- super(fields, caller, client, streaming);
- } Committable suggestion
Suggested change
ToolsBiome
|
||||||||||||||||||
|
||||||||||||||||||
async buildUrlMethod(): Promise<string> { | ||||||||||||||||||
return "predict"; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
formatData( | ||||||||||||||||||
input: GoogleEmbeddingsInstance[], | ||||||||||||||||||
parameters: GoogleAIModelRequestParams | ||||||||||||||||||
): unknown { | ||||||||||||||||||
return { | ||||||||||||||||||
instances: input, | ||||||||||||||||||
parameters, | ||||||||||||||||||
}; | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
/** | ||||||||||||||||||
* Defines the parameters required to initialize a | ||||||||||||||||||
* GoogleEmbeddings instance. It extends EmbeddingsParams and | ||||||||||||||||||
* GoogleConnectionParams. | ||||||||||||||||||
*/ | ||||||||||||||||||
export interface BaseGoogleEmbeddingsParams<AuthOptions> | ||||||||||||||||||
extends EmbeddingsParams, | ||||||||||||||||||
GoogleConnectionParams<AuthOptions> { | ||||||||||||||||||
model: string; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
/** | ||||||||||||||||||
* Defines additional options specific to the | ||||||||||||||||||
* GoogleEmbeddingsInstance. It extends AsyncCallerCallOptions. | ||||||||||||||||||
*/ | ||||||||||||||||||
export interface BaseGoogleEmbeddingsOptions extends AsyncCallerCallOptions {} | ||||||||||||||||||
|
||||||||||||||||||
/** | ||||||||||||||||||
* Represents an instance for generating embeddings using the Google | ||||||||||||||||||
* Vertex AI API. It contains the content to be embedded. | ||||||||||||||||||
*/ | ||||||||||||||||||
export interface GoogleEmbeddingsInstance { | ||||||||||||||||||
content: string; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
/** | ||||||||||||||||||
* Defines the structure of the embeddings results returned by the Google | ||||||||||||||||||
* Vertex AI API. It extends GoogleBasePrediction and contains the | ||||||||||||||||||
* embeddings and their statistics. | ||||||||||||||||||
*/ | ||||||||||||||||||
export interface BaseGoogleEmbeddingsResults { | ||||||||||||||||||
embeddings: { | ||||||||||||||||||
statistics: { | ||||||||||||||||||
token_count: number; | ||||||||||||||||||
truncated: boolean; | ||||||||||||||||||
}; | ||||||||||||||||||
values: number[]; | ||||||||||||||||||
}; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
/** | ||||||||||||||||||
* Enables calls to the Google Cloud's Vertex AI API to access | ||||||||||||||||||
* the embeddings generated by Large Language Models. | ||||||||||||||||||
* | ||||||||||||||||||
* To use, you will need to have one of the following authentication | ||||||||||||||||||
* methods in place: | ||||||||||||||||||
* - You are logged into an account permitted to the Google Cloud project | ||||||||||||||||||
* using Vertex AI. | ||||||||||||||||||
* - You are running this on a machine using a service account permitted to | ||||||||||||||||||
* the Google Cloud project using Vertex AI. | ||||||||||||||||||
* - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the | ||||||||||||||||||
* path of a credentials file for a service account permitted to the | ||||||||||||||||||
* Google Cloud project using Vertex AI. | ||||||||||||||||||
* @example | ||||||||||||||||||
* ```typescript | ||||||||||||||||||
* const model = new GoogleEmbeddings(); | ||||||||||||||||||
* const res = await model.embedQuery( | ||||||||||||||||||
* "What would be a good company name for a company that makes colorful socks?" | ||||||||||||||||||
* ); | ||||||||||||||||||
* console.log({ res }); | ||||||||||||||||||
* ``` | ||||||||||||||||||
*/ | ||||||||||||||||||
export abstract class BaseGoogleEmbeddings<AuthOptions> | ||||||||||||||||||
extends Embeddings | ||||||||||||||||||
implements BaseGoogleEmbeddingsParams<AuthOptions> | ||||||||||||||||||
{ | ||||||||||||||||||
model: string; | ||||||||||||||||||
|
||||||||||||||||||
private connection: GoogleAIConnection< | ||||||||||||||||||
BaseGoogleEmbeddingsOptions, | ||||||||||||||||||
GoogleEmbeddingsInstance[], | ||||||||||||||||||
GoogleConnectionParams<AuthOptions> | ||||||||||||||||||
>; | ||||||||||||||||||
|
||||||||||||||||||
constructor(fields: BaseGoogleEmbeddingsParams<AuthOptions>) { | ||||||||||||||||||
super(fields); | ||||||||||||||||||
|
||||||||||||||||||
this.model = fields.model; | ||||||||||||||||||
this.connection = new EmbeddingsConnection( | ||||||||||||||||||
{ ...fields, ...this }, | ||||||||||||||||||
this.caller, | ||||||||||||||||||
this.buildClient(fields), | ||||||||||||||||||
false | ||||||||||||||||||
); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
abstract buildAbstractedClient( | ||||||||||||||||||
fields?: GoogleConnectionParams<AuthOptions> | ||||||||||||||||||
): GoogleAbstractedClient; | ||||||||||||||||||
|
||||||||||||||||||
buildApiKeyClient(apiKey: string): GoogleAbstractedClient { | ||||||||||||||||||
return new ApiKeyGoogleAuth(apiKey); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
buildApiKey( | ||||||||||||||||||
fields?: GoogleConnectionParams<AuthOptions> | ||||||||||||||||||
): string | undefined { | ||||||||||||||||||
return fields?.apiKey ?? getEnvironmentVariable("GOOGLE_API_KEY"); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
buildClient( | ||||||||||||||||||
fields?: GoogleConnectionParams<AuthOptions> | ||||||||||||||||||
): GoogleAbstractedClient { | ||||||||||||||||||
const apiKey = this.buildApiKey(fields); | ||||||||||||||||||
if (apiKey) { | ||||||||||||||||||
return this.buildApiKeyClient(apiKey); | ||||||||||||||||||
} else { | ||||||||||||||||||
return this.buildAbstractedClient(fields); | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
/** | ||||||||||||||||||
* Takes an array of documents as input and returns a promise that | ||||||||||||||||||
* resolves to a 2D array of embeddings for each document. It splits the | ||||||||||||||||||
* documents into chunks and makes requests to the Google Vertex AI API to | ||||||||||||||||||
* generate embeddings. | ||||||||||||||||||
* @param documents An array of documents to be embedded. | ||||||||||||||||||
* @returns A promise that resolves to a 2D array of embeddings for each document. | ||||||||||||||||||
*/ | ||||||||||||||||||
async embedDocuments(documents: string[]): Promise<number[][]> { | ||||||||||||||||||
const instanceChunks: GoogleEmbeddingsInstance[][] = chunkArray( | ||||||||||||||||||
documents.map((document) => ({ | ||||||||||||||||||
content: document, | ||||||||||||||||||
})), | ||||||||||||||||||
5 | ||||||||||||||||||
); // Vertex AI accepts max 5 instances per prediction | ||||||||||||||||||
const parameters = {}; | ||||||||||||||||||
const options = {}; | ||||||||||||||||||
const responses = await Promise.all( | ||||||||||||||||||
instanceChunks.map((instances) => | ||||||||||||||||||
this.connection.request(instances, parameters, options) | ||||||||||||||||||
) | ||||||||||||||||||
); | ||||||||||||||||||
const result: number[][] = | ||||||||||||||||||
responses | ||||||||||||||||||
?.map( | ||||||||||||||||||
(response) => | ||||||||||||||||||
(response?.data as any)?.predictions?.map( | ||||||||||||||||||
(result: any) => result.embeddings.values | ||||||||||||||||||
) ?? [] | ||||||||||||||||||
) | ||||||||||||||||||
.flat() ?? []; | ||||||||||||||||||
return result; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
/** | ||||||||||||||||||
* Takes a document as input and returns a promise that resolves to an | ||||||||||||||||||
* embedding for the document. It calls the embedDocuments method with the | ||||||||||||||||||
* document as the input. | ||||||||||||||||||
* @param document A document to be embedded. | ||||||||||||||||||
* @returns A promise that resolves to an embedding for the document. | ||||||||||||||||||
*/ | ||||||||||||||||||
async embedQuery(document: string): Promise<number[]> { | ||||||||||||||||||
const data = await this.embedDocuments([document]); | ||||||||||||||||||
return data[0]; | ||||||||||||||||||
} | ||||||||||||||||||
Comment on lines
+198
to
+201
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the
|
||||||||||||||||||
} |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,39 @@ | ||||||||
import { | ||||||||
GoogleAbstractedClient, | ||||||||
GoogleConnectionParams, | ||||||||
BaseGoogleEmbeddings, | ||||||||
BaseGoogleEmbeddingsParams, | ||||||||
} from "@langchain/google-common"; | ||||||||
import { GoogleAuthOptions } from "google-auth-library"; | ||||||||
import { GAuthClient } from "./auth.js"; | ||||||||
|
||||||||
/** | ||||||||
* Input to LLM class. | ||||||||
*/ | ||||||||
export interface GoogleEmbeddingsInput | ||||||||
extends BaseGoogleEmbeddingsParams<GoogleAuthOptions> {} | ||||||||
|
||||||||
/** | ||||||||
* Integration with an LLM. | ||||||||
*/ | ||||||||
export class GoogleEmbeddings | ||||||||
extends BaseGoogleEmbeddings<GoogleAuthOptions> | ||||||||
implements GoogleEmbeddingsInput | ||||||||
{ | ||||||||
// Used for tracing, replace with the same name as your class | ||||||||
static lc_name() { | ||||||||
return "GoogleEmbeddings"; | ||||||||
} | ||||||||
|
||||||||
lc_serializable = true; | ||||||||
|
||||||||
constructor(fields: GoogleEmbeddingsInput) { | ||||||||
super(fields); | ||||||||
} | ||||||||
Comment on lines
+30
to
+32
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove the unnecessary constructor. The constructor in - constructor(fields: GoogleEmbeddingsInput) {
- super(fields);
- } Committable suggestion
Suggested change
ToolsBiome
|
||||||||
|
||||||||
buildAbstractedClient( | ||||||||
fields?: GoogleConnectionParams<GoogleAuthOptions> | ||||||||
): GoogleAbstractedClient { | ||||||||
return new GAuthClient(fields); | ||||||||
} | ||||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
export * from "./chat_models.js"; | ||
export * from "./llms.js"; | ||
export * from "./embeddings.js"; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import { | ||
type GoogleEmbeddingsInput, | ||
GoogleEmbeddings, | ||
} from "@langchain/google-webauth"; | ||
|
||
/** | ||
* Input to chat model class. | ||
*/ | ||
export interface GoogleVertexAIEmbeddingsInput extends GoogleEmbeddingsInput {} | ||
|
||
/** | ||
* Integration with a chat model. | ||
*/ | ||
export class GoogleVertexAIEmbeddings extends GoogleEmbeddings { | ||
Comment on lines
+11
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update the class description. The class description inaccurately states "Integration with a chat model." It should be updated to reflect that this class is for embeddings, such as "Integration with Google Vertex AI for embeddings." |
||
static lc_name() { | ||
return "GoogleVertexAIEmbeddings"; | ||
} | ||
|
||
constructor(fields: GoogleVertexAIEmbeddingsInput) { | ||
super({ | ||
...fields, | ||
platformType: "gcp", | ||
}); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
export * from "./chat_models.js"; | ||
export * from "./llms.js"; | ||
export * from "./embeddings.js"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The class
GoogleVertexAIEmbeddings
has been marked as deprecated, which is good. However, to ensure smooth transition for users, it would be helpful to provide more detailed information in the deprecation notice. Consider adding a brief explanation of why the class is being deprecated and provide a code example of how to use the new import location. This will help users understand the change and quickly update their code.