Skip to content
This repository has been archived by the owner on Oct 30, 2024. It is now read-only.

Commit

Permalink
feat: add progress indication for embedding generation process (#127)
Browse files Browse the repository at this point in the history
  • Loading branch information
iwilltry42 authored Sep 19, 2024
1 parent 09fe78a commit bd17366
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 4 deletions.
8 changes: 6 additions & 2 deletions pkg/client/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,12 @@ func ingestPaths(ctx context.Context, c Client, opts *IngestPathsOpts, datasetID
defer sem.Release(1)

ingestedFilesCount++
currentMetadata := metadataStack[len(metadataStack)-1]
return ingestionFunc(path, currentMetadata.Metadata[filepath.Base(path)]) // FIXME: metadata
var fileMetadata FileMetadata
if len(metadataStack) > 0 {
currentMetadata := metadataStack[len(metadataStack)-1]
fileMetadata = currentMetadata.Metadata[filepath.Base(path)]
}
return ingestionFunc(path, fileMetadata)
})
}

Expand Down
20 changes: 19 additions & 1 deletion pkg/datastore/datastore.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/gptscript-ai/knowledge/pkg/config"
etypes "github.com/gptscript-ai/knowledge/pkg/datastore/embeddings/types"
"github.com/gptscript-ai/knowledge/pkg/datastore/types"
"github.com/gptscript-ai/knowledge/pkg/log"
"github.com/gptscript-ai/knowledge/pkg/output"

"github.com/adrg/xdg"
Expand Down Expand Up @@ -68,6 +69,23 @@ func GetDatastorePaths(dsn, vectordbPath string) (string, string, bool, error) {
return dsn, vectordbPath, isArchive, nil
}

func LogEmbeddingFunc(embeddingFunc cg.EmbeddingFunc) cg.EmbeddingFunc {
return func(ctx context.Context, text string) ([]float32, error) {
l := log.FromCtx(ctx).With("stage", "embedding")

l.With("status", "starting").Info("Creating embedding")

embedding, err := embeddingFunc(ctx, text)
if err != nil {
l.With("status", "failed").Error("Failed to create embedding", "error", err)
return nil, err
}

l.With("status", "completed").Info("Created embedding")
return embedding, nil
}
}

func NewDatastore(dsn string, automigrate bool, vectorDBPath string, embeddingProvider etypes.EmbeddingModelProvider) (*Datastore, error) {
dsn, vectorDBPath, isArchive, err := GetDatastorePaths(dsn, vectorDBPath)
if err != nil {
Expand Down Expand Up @@ -106,7 +124,7 @@ func NewDatastore(dsn string, automigrate bool, vectorDBPath string, embeddingPr

ds := &Datastore{
Index: idx,
Vectorstore: chromem.New(vsdb, embeddingFunc),
Vectorstore: chromem.New(vsdb, LogEmbeddingFunc(embeddingFunc)),
EmbeddingModelProvider: embeddingProvider,
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/datastore/ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ func (s *Datastore) Ingest(ctx context.Context, datasetID string, name string, c
// Add documents to VectorStore -> This generates the embeddings
slog.Debug("Ingesting documents", "count", len(docs))

log.ToCtx(ctx, log.FromCtx(ctx).With("phase", "store").With("num_documents", len(docs)))
ctx = log.ToCtx(ctx, log.FromCtx(ctx).With("phase", "store").With("num_documents", len(docs)))

docIDs, err := s.Vectorstore.AddDocuments(ctx, docs, datasetID)
if err != nil {
Expand Down

0 comments on commit bd17366

Please sign in to comment.