Skip to content

Commit

Permalink
embeddings: use EmbedderClient for ernie embeddings (#363)
Browse files Browse the repository at this point in the history
also removes unused internal method

For #356
  • Loading branch information
eliben authored and tmc committed Dec 5, 2023
1 parent 7ad3f13 commit 1702d73
Showing 1 changed file with 3 additions and 49 deletions.
52 changes: 3 additions & 49 deletions embeddings/ernie/ernie.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"

"github.com/tmc/langchaingo/embeddings"
"github.com/tmc/langchaingo/embeddings/internal/embedderclient"
"github.com/tmc/langchaingo/llms/ernie"
)

Expand Down Expand Up @@ -40,57 +41,10 @@ func NewErnie(opts ...Option) (*Ernie, error) {
return v, nil
}

// split texts with batchCount.
func (e *Ernie) embed(ctx context.Context, texts []string) ([][]float32, error) {
emb := make([][]float32, 0, len(texts))

offsetLen := len(texts) / e.batchCount
for i := 0; i <= offsetLen; i++ {
start := i * e.batchCount
end := i*e.batchCount + e.batchCount

if end > len(texts) {
end = len(texts)
}

curTextEmbeddings, err := e.client.CreateEmbedding(ctx, texts[start:end])
if err != nil {
return nil, err
}

emb = append(emb, curTextEmbeddings...)
}
return emb, nil
}

// EmbedDocuments use ernie Embedding-V1.
func (e *Ernie) EmbedDocuments(ctx context.Context, texts []string) ([][]float32, error) {
batchedTexts := embeddings.BatchTexts(
embeddings.MaybeRemoveNewLines(texts, e.stripNewLines),
e.batchSize,
)

emb := make([][]float32, 0, len(texts))
for _, texts := range batchedTexts {
curTextEmbeddings, err := e.embed(ctx, texts)
if err != nil {
return nil, err
}

textLengths := make([]int, 0, len(texts))
for _, text := range texts {
textLengths = append(textLengths, len(text))
}

combined, err := embeddings.CombineVectors(curTextEmbeddings, textLengths)
if err != nil {
return nil, err
}

emb = append(emb, combined)
}

return emb, nil
texts = embeddings.MaybeRemoveNewLines(texts, e.stripNewLines)
return embedderclient.BatchedEmbed(ctx, e.client, texts, e.batchSize)
}

// EmbedQuery use ernie Embedding-V1.
Expand Down

0 comments on commit 1702d73

Please sign in to comment.