Skip to content
This repository has been archived by the owner on Oct 30, 2024. It is now read-only.

add: pgvector vectorstore #140

Merged
merged 3 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
services:
pgvector:
container_name: pgvector
image: pgvector/pgvector:0.7.4-pg17
ports:
- "5432:5432"
environment:
POSTGRES_DB: knowledge
POSTGRES_USER: knowledge
POSTGRES_PASSWORD: knowledge
12 changes: 8 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ require (
github.com/google/uuid v1.6.0
github.com/hupe1980/golc v0.0.112
github.com/iwilltry42/bm25-go v0.0.0-20240909111832-a928590cc9da
github.com/jackc/pgx/v5 v5.7.1
github.com/jmcarbo/stopwords v1.1.9
github.com/joho/godotenv v1.5.1
github.com/knadh/koanf/parsers/json v0.1.0
Expand All @@ -38,14 +39,15 @@ require (
github.com/ledongthuc/pdf v0.0.0-20240201131950-da5b75280b06
github.com/lu4p/cat v0.1.5
github.com/mitchellh/mapstructure v1.5.0
github.com/pgvector/pgvector-go v0.2.2
github.com/philippgille/chromem-go v0.6.1-0.20240811154507-a1944285b284
github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.9.0
github.com/swaggo/files v1.0.1
github.com/swaggo/gin-swagger v1.6.0
github.com/swaggo/swag v1.16.3
github.com/tmc/langchaingo v0.1.12
golang.org/x/sync v0.7.0
golang.org/x/sync v0.8.0
gorm.io/gorm v1.25.10
sigs.k8s.io/yaml v1.4.0
)
Expand Down Expand Up @@ -109,6 +111,8 @@ require (
github.com/hupe1980/go-textractor v0.0.9 // indirect
github.com/hupe1980/go-tiktoken v0.0.9 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 // indirect
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect
Expand Down Expand Up @@ -158,11 +162,11 @@ require (
gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect
gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.24.0 // indirect
golang.org/x/crypto v0.27.0 // indirect
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/text v0.18.0 // indirect
golang.org/x/tools v0.22.0 // indirect
google.golang.org/api v0.184.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240610135401-a8a62080eff3 // indirect
Expand Down
183 changes: 173 additions & 10 deletions go.sum

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pkg/cmd/askdir.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func (s *ClientAskDir) Customize(cmd *cobra.Command) {
}

func (s *ClientAskDir) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
9 changes: 5 additions & 4 deletions pkg/cmd/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cmd

import (
"archive/zip"
"context"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -89,13 +90,13 @@ func (s *Client) loadArchive() error {
return fmt.Errorf("knowledge archive must contain exactly one .db and one .gob file")
}

s.DSN = types.ArchivePrefix + dbFile
s.VectorDBPath = types.ArchivePrefix + vectorStoreFile
s.DatabaseConfig.DSN = types.ArchivePrefix + dbFile
s.VectorDBConfig.DSN = types.ArchivePrefix + vectorStoreFile

return nil
}

func (s *Client) getClient() (client.Client, error) {
func (s *Client) getClient(ctx context.Context) (client.Client, error) {
if err := s.loadArchive(); err != nil {
return nil, err
}
Expand All @@ -111,7 +112,7 @@ func (s *Client) getClient() (client.Client, error) {
return nil, err
}

ds, err := datastore.NewDatastore(s.DSN, s.AutoMigrate == "true", s.VectorDBConfig.VectorDBPath, provider)
ds, err := datastore.NewDatastore(ctx, s.DatabaseConfig.DSN, s.AutoMigrate == "true", s.VectorDBConfig.DSN, provider)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/create_dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func (s *ClientCreateDataset) Customize(cmd *cobra.Command) {
}

func (s *ClientCreateDataset) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/delete_dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func (s *ClientDeleteDataset) Customize(cmd *cobra.Command) {
}

func (s *ClientDeleteDataset) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/cmd/edit_dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"encoding/json"
"fmt"

"github.com/gptscript-ai/knowledge/pkg/datastore"
"github.com/gptscript-ai/knowledge/pkg/index"
"github.com/spf13/cobra"
Expand All @@ -23,7 +24,7 @@ func (s *ClientEditDataset) Customize(cmd *cobra.Command) {
}

func (s *ClientEditDataset) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/cmd/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cmd

import (
"fmt"

"github.com/spf13/cobra"
)

Expand All @@ -17,7 +18,7 @@ func (s *ClientExportDatasets) Customize(cmd *cobra.Command) {
}

func (s *ClientExportDatasets) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/cmd/get_dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"encoding/json"
"fmt"

"github.com/spf13/cobra"
)

Expand All @@ -19,7 +20,7 @@ func (s *ClientGetDataset) Customize(cmd *cobra.Command) {
}

func (s *ClientGetDataset) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func (s *ClientImportDatasets) Customize(cmd *cobra.Command) {
}

func (s *ClientImportDatasets) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ This is a constraint of the Vector Database and Similarity Search, as different
}

func (s *ClientIngest) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/cmd/list_datasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"encoding/json"
"fmt"

"github.com/spf13/cobra"
)

Expand All @@ -18,7 +19,7 @@ func (s *ClientListDatasets) Customize(cmd *cobra.Command) {
}

func (s *ClientListDatasets) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
1 change: 0 additions & 1 deletion pkg/cmd/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ func (s *ClientLoad) Run(cmd *cobra.Command, args []string) error {

var texts []string
for _, doc := range docs {

if len(doc.Content) == 0 {
continue
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/cmd/reset.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func (s *ClientResetDatastore) Customize(cmd *cobra.Command) {
}

func (s *ClientResetDatastore) Run(cmd *cobra.Command, args []string) error {
dsn, vectordbPath, _, err := datastore.GetDatastorePaths(s.DSN, s.VectorDBConfig.VectorDBPath)
dsn, vectordbPath, _, err := datastore.GetDefaultDSNs(s.DatabaseConfig.DSN, s.VectorDBConfig.DSN)
if err != nil {
return err
}
Expand All @@ -34,6 +34,6 @@ func (s *ClientResetDatastore) Run(cmd *cobra.Command, args []string) error {
return fmt.Errorf("failed to remove vector database directory: %w", err)
}

fmt.Printf("Successfully reset datastore (DSN: %q, VectorDBPath: %q)\n", dsn, vectordbPath)
fmt.Printf("Successfully reset datastore (DSN: %q, DSN: %q)\n", dsn, vectordbPath)
return nil
}
2 changes: 1 addition & 1 deletion pkg/cmd/retrieve.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (s *ClientRetrieve) Run(cmd *cobra.Command, args []string) error {
}
slog.Info("Retrieving sources for query", "query", query, "datasets", datasetIDs)

c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
7 changes: 4 additions & 3 deletions pkg/cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ package cmd

import (
"fmt"
"github.com/gptscript-ai/knowledge/pkg/datastore/embeddings"
"github.com/spf13/cobra"
"log/slog"
"os/signal"
"syscall"

"github.com/gptscript-ai/knowledge/pkg/datastore/embeddings"
"github.com/spf13/cobra"

"github.com/gptscript-ai/knowledge/pkg/config"
"github.com/gptscript-ai/knowledge/pkg/datastore"
"github.com/gptscript-ai/knowledge/pkg/server"
Expand Down Expand Up @@ -47,7 +48,7 @@ func (s *Server) Run(cmd *cobra.Command, _ []string) error {
return err
}

ds, err := datastore.NewDatastore(s.DSN, s.AutoMigrate == "true", s.VectorDBConfig.VectorDBPath, provider)
ds, err := datastore.NewDatastore(cmd.Context(), s.DatabaseConfig.DSN, s.AutoMigrate == "true", s.VectorDBConfig.DSN, provider)
if err != nil {
return fmt.Errorf("failed to initialize datastore: %w", err)
}
Expand Down
9 changes: 5 additions & 4 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ package config

import (
"fmt"
"os"
"path"

"github.com/knadh/koanf/parsers/json"
"github.com/knadh/koanf/parsers/yaml"
"github.com/knadh/koanf/providers/rawbytes"
"github.com/knadh/koanf/v2"
"os"
"path"
)

type Config struct {
Expand All @@ -25,12 +26,12 @@ type ModelProviderConfig struct {
}

type DatabaseConfig struct {
DSN string `usage:"Server database connection string (default \"sqlite://$XDG_DATA_HOME/gptscript/knowledge/knowledge.db\")" default:"" env:"KNOW_DB_DSN"`
DSN string `name:"index-dsn" usage:"Index Database Connection string (relational DB) (default \"sqlite://$XDG_DATA_HOME/gptscript/knowledge/knowledge.db\")" default:"" env:"KNOW_INDEX_DSN"`
AutoMigrate string `usage:"Auto migrate database" default:"true" env:"KNOW_DB_AUTO_MIGRATE"`
}

type VectorDBConfig struct {
VectorDBPath string `usage:"VectorDBPath to the vector database (default \"$XDG_DATA_HOME/gptscript/knowledge/vector.db\")" default:"" env:"KNOW_VECTOR_DB_PATH"`
DSN string `name:"vector-dsn" usage:"DSN to the vector database (default \"chromem:$XDG_DATA_HOME/gptscript/knowledge/vector.db\")" default:"" env:"KNOW_VECTOR_DSN"`
}

func LoadConfig(configFile string) (*Config, error) {
Expand Down
Loading
Loading