diff --git a/changelog/unreleased/enhancement-search.md b/changelog/unreleased/enhancement-search.md new file mode 100644 index 00000000000..699b81871b4 --- /dev/null +++ b/changelog/unreleased/enhancement-search.md @@ -0,0 +1,8 @@ +Bugfix: Enhancement search + +Provides multiple enhancement to the current search implementation. +* content extraction, search now supports apache tika to extract resource contents. +* search engine, underlying search engine is swappable now. +* event consumers, the number of event consumers can now be set, which improves the speed of the individual tasks + +https://github.com/owncloud/ocis/pull/5221 diff --git a/go.mod b/go.mod index d9ab4a53813..dfc32031bc2 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/Masterminds/semver v1.5.0 github.com/MicahParks/keyfunc v1.5.1 github.com/armon/go-radix v1.0.0 + github.com/bbalet/stopwords v1.0.0 github.com/blevesearch/bleve/v2 v2.3.5 github.com/coreos/go-oidc/v3 v3.4.0 github.com/cs3org/go-cs3apis v0.0.0-20221012090518-ef2996678965 @@ -36,6 +37,7 @@ require ( github.com/gofrs/uuid v4.3.1+incompatible github.com/golang-jwt/jwt/v4 v4.4.2 github.com/golang/protobuf v1.5.2 + github.com/google/go-tika v0.2.0 github.com/gookit/config/v2 v2.1.8 github.com/gorilla/mux v1.8.0 github.com/grpc-ecosystem/grpc-gateway/v2 v2.13.0 diff --git a/go.sum b/go.sum index 8d6ea82f65c..fa50ac66427 100644 --- a/go.sum +++ b/go.sum @@ -220,6 +220,8 @@ github.com/aws/aws-sdk-go v1.43.11/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4 github.com/aws/aws-sdk-go v1.44.114/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= github.com/aws/aws-sdk-go v1.44.122 h1:p6mw01WBaNpbdP2xrisz5tIkcNwzj/HysobNoaAHjgo= github.com/aws/aws-sdk-go v1.44.122/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= +github.com/bbalet/stopwords v1.0.0 h1:0TnGycCtY0zZi4ltKoOGRFIlZHv0WqpoIGUsObjztfo= +github.com/bbalet/stopwords v1.0.0/go.mod h1:sAWrQoDMfqARGIn4s6dp7OW7ISrshUD8IP2q3KoqPjc= github.com/beevik/etree v1.1.0 h1:T0xke/WvNtMoCqgzPhkX2r4rjY3GDZFi+FjpRZY2Jbs= github.com/beevik/etree v1.1.0/go.mod h1:r8Aw8JqVegEf0w2fDnATrX9VpkMcyFeM0FhwO62wh+A= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= @@ -661,6 +663,8 @@ github.com/google/go-github/v32 v32.1.0/go.mod h1:rIEpZD9CTDQwDK9GDrtMTycQNA4JU3 github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= +github.com/google/go-tika v0.2.0 h1:+1dnOoJ/pJrko2XH/3Rm5ssG9+ixOgjmPEz94ikUsxI= +github.com/google/go-tika v0.2.0/go.mod h1:vnMADwNG1A2AJx+ycQgTNMGe3ZG4CZUowEhK2FykumQ= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= @@ -1451,6 +1455,7 @@ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= diff --git a/services/search/Makefile b/services/search/Makefile index f8a3b5a8c01..a4370c91dda 100644 --- a/services/search/Makefile +++ b/services/search/Makefile @@ -25,8 +25,10 @@ include ../../.make/generate.mk .PHONY: ci-go-generate ci-go-generate: $(MOCKERY) # CI runs ci-node-generate automatically before this target - $(MOCKERY) --dir pkg/search --output pkg/search/mocks --case underscore --name IndexClient - $(MOCKERY) --dir pkg/search --output pkg/search/mocks --case underscore --name ProviderClient + $(MOCKERY) --dir pkg/engine --output pkg/engine/mocks --case underscore --name Engine + $(MOCKERY) --dir pkg/content --output pkg/content/mocks --case underscore --name Extractor + $(MOCKERY) --dir pkg/content --output pkg/content/mocks --case underscore --name Retriever + $(MOCKERY) --dir pkg/search --output pkg/search/mocks --case underscore --name Searcher .PHONY: ci-node-generate ci-node-generate: diff --git a/services/search/pkg/command/server.go b/services/search/pkg/command/server.go index 789964bd0f2..9dc3f11100a 100644 --- a/services/search/pkg/command/server.go +++ b/services/search/pkg/command/server.go @@ -50,13 +50,18 @@ func Server(cfg *config.Config) *cli.Command { mtrcs := metrics.New() mtrcs.BuildInfo.WithLabelValues(version.GetString()).Set(1) - grpcServer := grpc.Server( + grpcServer, teardown, err := grpc.Server( grpc.Config(cfg), grpc.Logger(logger), grpc.Name(cfg.Service.Name), grpc.Context(ctx), grpc.Metrics(mtrcs), ) + defer teardown() + if err != nil { + logger.Info().Err(err).Str("transport", "grpc").Msg("Failed to initialize server") + return err + } gr.Add(grpcServer.Run, func(_ error) { logger.Error(). @@ -71,7 +76,6 @@ func Server(cfg *config.Config) *cli.Command { debug.Context(ctx), debug.Config(cfg), ) - if err != nil { logger.Info().Err(err).Str("transport", "debug").Msg("Failed to initialize server") return err diff --git a/services/search/pkg/config/config.go b/services/search/pkg/config/config.go index 1f463c0f562..83dd9f0ca3c 100644 --- a/services/search/pkg/config/config.go +++ b/services/search/pkg/config/config.go @@ -18,24 +18,13 @@ type Config struct { GRPC GRPCConfig `yaml:"grpc"` - Datapath string `yaml:"data_path" env:"SEARCH_DATA_PATH" desc:"The directory where the filesystem storage will store search data. If not definied, the root directory derives from $OCIS_BASE_DATA_PATH:/search."` - DebounceDuration int `yaml:"debounce_duration" env:"SEARCH_REINDEX_DEBOUNCE_DURATION" desc:"The duration in milliseconds the reindex debouncer waits before triggering a reindex of a space that was modified."` - Reva *shared.Reva `yaml:"reva"` GRPCClientTLS *shared.GRPCClientTLS `yaml:"grpc_client_tls"` Events Events `yaml:"events"` + Engine Engine `yaml:"engine"` + Extractor Extractor `yaml:"extractor"` MachineAuthAPIKey string `yaml:"machine_auth_api_key" env:"OCIS_MACHINE_AUTH_API_KEY;SEARCH_MACHINE_AUTH_API_KEY" desc:"Machine auth API key used to validate internal requests necessary for the access to resources from other services."` Context context.Context `yaml:"-"` } - -// Events combines the configuration options for the event bus. -type Events struct { - Endpoint string `yaml:"endpoint" env:"SEARCH_EVENTS_ENDPOINT" desc:"The address of the event system. The event system is the message queuing service. It is used as message broker for the microservice architecture."` - Cluster string `yaml:"cluster" env:"SEARCH_EVENTS_CLUSTER" desc:"The clusterID of the event system. The event system is the message queuing service. It is used as message broker for the microservice architecture. Mandatory when using NATS as event system."` - ConsumerGroup string `yaml:"group" env:"SEARCH_EVENTS_GROUP" desc:"The customer group of the service. One group will only get one copy of an event"` - TLSInsecure bool `yaml:"tls_insecure" env:"OCIS_INSECURE;SEARCH_EVENTS_TLS_INSECURE" desc:"Whether to verify the server TLS certificates."` - TLSRootCACertificate string `yaml:"tls_root_ca_certificate" env:"SEARCH_EVENTS_TLS_ROOT_CA_CERTIFICATE" desc:"The root CA certificate used to validate the server's TLS certificate. If provided SEARCH_EVENTS_TLS_INSECURE will be seen as false."` - EnableTLS bool `yaml:"enable_tls" env:"OCIS_EVENTS_ENABLE_TLS;SEARCH_EVENTS_ENABLE_TLS" desc:"Enable TLS for the connection to the events broker. The events broker is the ocis service which receives and delivers events between the services.."` -} diff --git a/services/search/pkg/config/content.go b/services/search/pkg/config/content.go new file mode 100644 index 00000000000..dae39636482 --- /dev/null +++ b/services/search/pkg/config/content.go @@ -0,0 +1,13 @@ +package config + +// Extractor defines which extractor to use +type Extractor struct { + Type string `yaml:"type" env:"SEARCH_EXTRACTOR_TYPE" desc:"Defines the content extraction engine."` + CS3AllowInsecure bool `yaml:"cs3_allow_insecure" env:"OCIS_INSECURE;SEARCH_EXTRACTOR_CS3SOURCE_INSECURE" desc:"Ignore untrusted SSL certificates when connecting to the CS3 source."` + Tika ExtractorTika `yaml:"tika"` +} + +// ExtractorTika configures the Tika extractor +type ExtractorTika struct { + TikaURL string `yaml:"tika_url" env:"SEARCH_EXTRACTOR_TIKA_TIKA_URL" desc:"URL of the tika server."` +} diff --git a/services/search/pkg/config/defaults/defaultconfig.go b/services/search/pkg/config/defaults/defaultconfig.go index ac4604e0d64..7fb9ef946fd 100644 --- a/services/search/pkg/config/defaults/defaultconfig.go +++ b/services/search/pkg/config/defaults/defaultconfig.go @@ -1,7 +1,7 @@ package defaults import ( - "path" + "path/filepath" "github.com/owncloud/ocis/v2/ocis-pkg/config/defaults" "github.com/owncloud/ocis/v2/ocis-pkg/shared" @@ -29,14 +29,26 @@ func DefaultConfig() *config.Config { Service: config.Service{ Name: "search", }, - Datapath: path.Join(defaults.BaseDataPath(), "search"), - DebounceDuration: 1000, - Reva: shared.DefaultRevaConfig(), + Reva: shared.DefaultRevaConfig(), + Engine: config.Engine{ + Type: "bleve", + Bleve: config.EngineBleve{ + Datapath: filepath.Join(defaults.BaseDataPath(), "search"), + }, + }, + Extractor: config.Extractor{ + Type: "basic", + CS3AllowInsecure: false, + Tika: config.ExtractorTika{ + TikaURL: "http://127.0.0.1:9998", + }, + }, Events: config.Events{ - Endpoint: "127.0.0.1:9233", - Cluster: "ocis-cluster", - ConsumerGroup: "search", - EnableTLS: false, + Endpoint: "127.0.0.1:9233", + Cluster: "ocis-cluster", + DebounceDuration: 1000, + AsyncUploads: false, + EnableTLS: false, }, MachineAuthAPIKey: "", } diff --git a/services/search/pkg/config/engine.go b/services/search/pkg/config/engine.go new file mode 100644 index 00000000000..39aa98fbbef --- /dev/null +++ b/services/search/pkg/config/engine.go @@ -0,0 +1,12 @@ +package config + +// Engine defines which search engine to use +type Engine struct { + Type string `yaml:"type" env:"SEARCH_ENGINE_TYPE" desc:"Defines which search engine to use."` + Bleve EngineBleve `yaml:"bleve"` +} + +// EngineBleve configures the bleve engine +type EngineBleve struct { + Datapath string `yaml:"data_path" env:"SEARCH_ENGINE_BLEVE_DATA_PATH" desc:"Path for the search persistence directory."` +} diff --git a/services/search/pkg/config/reva.go b/services/search/pkg/config/reva.go new file mode 100644 index 00000000000..f0c218ad806 --- /dev/null +++ b/services/search/pkg/config/reva.go @@ -0,0 +1,6 @@ +package config + +// Reva defines all available REVA configuration. +type Reva struct { + Address string `ocisConfig:"address" env:"REVA_GATEWAY" desc:"The CS3 gateway endpoint."` +} diff --git a/services/search/pkg/config/search.go b/services/search/pkg/config/search.go new file mode 100644 index 00000000000..cbed11c1676 --- /dev/null +++ b/services/search/pkg/config/search.go @@ -0,0 +1,14 @@ +package config + +// Events combines the configuration options for the event bus. +type Events struct { + Endpoint string `yaml:"endpoint" env:"SEARCH_EVENTS_ENDPOINT" desc:"The address of the event system. The event system is the message queuing service. It is used as message broker for the microservice architecture."` + Cluster string `yaml:"cluster" env:"SEARCH_EVENTS_CLUSTER" desc:"The clusterID of the event system. The event system is the message queuing service. It is used as message broker for the microservice architecture. Mandatory when using NATS as event system."` + AsyncUploads bool `yaml:"async_uploads" env:"STORAGE_USERS_OCIS_ASYNC_UPLOADS;SEARCH_EVENTS_ASYNC_UPLOADS" desc:"Enable asynchronous file uploads."` + NumConsumers int `yaml:"num_consumers" env:"SEARCH_EVENTS_NUM_CONSUMERS" desc:"number of event consumers per service instance"` + DebounceDuration int `yaml:"debounce_duration" env:"SEARCH_EVENTS_REINDEX_DEBOUNCE_DURATION" desc:"The duration in milliseconds the reindex debouncer waits before triggering a reindex of a space that was modified."` + + TLSInsecure bool `yaml:"tls_insecure" env:"OCIS_INSECURE;SEARCH_EVENTS_TLS_INSECURE" desc:"Whether to verify the server TLS certificates."` + TLSRootCACertificate string `yaml:"tls_root_ca_certificate" env:"SEARCH_EVENTS_TLS_ROOT_CA_CERTIFICATE" desc:"The root CA certificate used to validate the server's TLS certificate. If provided SEARCH_EVENTS_TLS_INSECURE will be seen as false."` + EnableTLS bool `yaml:"enable_tls" env:"OCIS_EVENTS_ENABLE_TLS;SEARCH_EVENTS_ENABLE_TLS" desc:"Enable TLS for the connection to the events broker. The events broker is the ocis service which receives and delivers events between the services.."` +} diff --git a/services/search/pkg/content/basic.go b/services/search/pkg/content/basic.go new file mode 100644 index 00000000000..d40bb455d12 --- /dev/null +++ b/services/search/pkg/content/basic.go @@ -0,0 +1,41 @@ +package content + +import ( + "context" + "time" + + storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + //"github.com/cs3org/reva/v2/pkg/tags" + "github.com/owncloud/ocis/v2/ocis-pkg/log" +) + +// Basic is the simplest Extractor implementation. +type Basic struct { + logger log.Logger +} + +// NewBasicExtractor creates a new Basic instance. +func NewBasicExtractor(logger log.Logger) (*Basic, error) { + return &Basic{logger: logger}, nil +} + +// Extract literally just rearranges the inputs and processes them into a Document. +func (b Basic) Extract(_ context.Context, ri *storageProvider.ResourceInfo) (Document, error) { + doc := Document{ + Name: ri.Name, + Size: ri.Size, + MimeType: ri.MimeType, + } + + //if m := ri.ArbitraryMetadata.GetMetadata(); m != nil { + //if t, ok := m["tags"]; ok { + //doc.Tags = tags.FromList(t).AsSlice() + //} + //} + + if ri.Mtime != nil { + doc.Mtime = time.Unix(int64(ri.Mtime.Seconds), int64(ri.Mtime.Nanos)).UTC().Format(time.RFC3339) + } + + return doc, nil +} diff --git a/services/search/pkg/content/basic_test.go b/services/search/pkg/content/basic_test.go new file mode 100644 index 00000000000..b7924bf18f8 --- /dev/null +++ b/services/search/pkg/content/basic_test.go @@ -0,0 +1,90 @@ +package content_test + +import ( + "context" + + storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + cs3Types "github.com/cs3org/go-cs3apis/cs3/types/v1beta1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/owncloud/ocis/v2/ocis-pkg/log" + "github.com/owncloud/ocis/v2/services/search/pkg/content" +) + +var _ = Describe("Basic", func() { + var ( + basic content.Extractor + logger = log.NewLogger() + ctx = context.TODO() + ) + + BeforeEach(func() { + basic, _ = content.NewBasicExtractor(logger) + }) + + Describe("extract", func() { + It("basic fields", func() { + ri := &storageProvider.ResourceInfo{ + Name: "bar.pdf", + Path: "./foo/bar.pdf", + Size: 1024, + MimeType: "application/pdf", + } + + doc, err := basic.Extract(ctx, ri) + + Expect(err).To(BeNil()) + Expect(doc).ToNot(BeNil()) + Expect(doc.Name).To(Equal(ri.Name)) + Expect(doc.Size).To(Equal(ri.Size)) + Expect(doc.MimeType).To(Equal(ri.MimeType)) + }) + + /*It("adds tags", func() { + for _, data := range []struct { + tags string + expect []string + }{ + {tags: "", expect: []string{}}, + {tags: ",,,", expect: []string{}}, + {tags: ",foo,,", expect: []string{"foo"}}, + {tags: ",foo,,bar,", expect: []string{"foo", "bar"}}, + } { + ri := &storageProvider.ResourceInfo{ + ArbitraryMetadata: &storageProvider.ArbitraryMetadata{ + Metadata: map[string]string{ + "tags": data.tags, + }, + }, + } + + doc, err := basic.Extract(ctx, ri) + Expect(err).To(BeNil()) + Expect(doc).ToNot(BeNil()) + Expect(doc.Tags).To(Equal(data.expect)) + } + })*/ + + It("RFC3339 mtime", func() { + for _, data := range []struct { + second uint64 + expect string + }{ + {second: 4000, expect: "1970-01-01T01:06:40Z"}, + {second: 3000, expect: "1970-01-01T00:50:00Z"}, + {expect: ""}, + } { + ri := &storageProvider.ResourceInfo{} + + if data.second != 0 { + ri.Mtime = &cs3Types.Timestamp{Seconds: data.second} + } + + doc, err := basic.Extract(ctx, ri) + Expect(err).To(BeNil()) + Expect(doc).ToNot(BeNil()) + Expect(doc.Mtime).To(Equal(data.expect)) + } + }) + }) +}) diff --git a/services/search/pkg/content/content.go b/services/search/pkg/content/content.go new file mode 100644 index 00000000000..11564df5d50 --- /dev/null +++ b/services/search/pkg/content/content.go @@ -0,0 +1,13 @@ +package content + +// Document wraps all resource meta fields, +// it is used as a content extraction result. +type Document struct { + Title string + Name string + Content string + Size uint64 + Mtime string + MimeType string + //Tags []string +} diff --git a/services/search/pkg/search/provider/provider_suite_test.go b/services/search/pkg/content/content_suite_test.go similarity index 56% rename from services/search/pkg/search/provider/provider_suite_test.go rename to services/search/pkg/content/content_suite_test.go index 9187db0278c..2d0d7468822 100644 --- a/services/search/pkg/search/provider/provider_suite_test.go +++ b/services/search/pkg/content/content_suite_test.go @@ -1,4 +1,4 @@ -package provider_test +package content_test import ( "testing" @@ -7,7 +7,7 @@ import ( . "github.com/onsi/gomega" ) -func TestProvider(t *testing.T) { +func TestContent(t *testing.T) { RegisterFailHandler(Fail) - RunSpecs(t, "Provider Suite") + RunSpecs(t, "Content Suite") } diff --git a/services/search/pkg/content/cs3.go b/services/search/pkg/content/cs3.go new file mode 100644 index 00000000000..9d2332fdc82 --- /dev/null +++ b/services/search/pkg/content/cs3.go @@ -0,0 +1,80 @@ +package content + +import ( + "context" + "crypto/tls" + "fmt" + "io" + "net/http" + + gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" + rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + revactx "github.com/cs3org/reva/v2/pkg/ctx" + "github.com/owncloud/ocis/v2/ocis-pkg/log" +) + +type cs3 struct { + httpClient http.Client + gwClient gateway.GatewayAPIClient + logger log.Logger +} + +func newCS3Retriever(client gateway.GatewayAPIClient, logger log.Logger, insecure bool) cs3 { + return cs3{ + httpClient: http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: insecure}, //nolint:gosec + }, + }, + gwClient: client, + logger: logger, + } +} + +// Retrieve downloads the file from a cs3 service +// The caller MUST make sure to close the returned ReadCloser +func (s cs3) Retrieve(ctx context.Context, rID *provider.ResourceId) (io.ReadCloser, error) { + at, ok := contextGet(ctx, revactx.TokenHeader) + if !ok { + return nil, fmt.Errorf("context without %s", revactx.TokenHeader) + } + + res, err := s.gwClient.InitiateFileDownload(ctx, &provider.InitiateFileDownloadRequest{Ref: &provider.Reference{ResourceId: rID, Path: "."}}) + if err != nil { + return nil, err + } + if res.Status.Code != rpc.Code_CODE_OK { + return nil, fmt.Errorf("could not load resoure: %s", res.Status.Message) + } + + var ep, tt string + for _, p := range res.Protocols { + if p.Protocol == "spaces" { + ep, tt = p.DownloadEndpoint, p.Token + break + } + } + if (ep == "" || tt == "") && len(res.Protocols) > 0 { + ep, tt = res.Protocols[0].DownloadEndpoint, res.Protocols[0].Token + } + + req, err := http.NewRequest(http.MethodGet, ep, nil) + if err != nil { + return nil, err + } + + req.Header.Set(revactx.TokenHeader, at) + req.Header.Set("X-Reva-Transfer", tt) + + cres, err := s.httpClient.Do(req) + if err != nil { + return nil, err + } + + if cres.StatusCode != http.StatusOK { + return nil, fmt.Errorf("could not download resource. Request returned with statuscode %d ", cres.StatusCode) + } + + return cres.Body, nil +} diff --git a/services/search/pkg/content/extractor.go b/services/search/pkg/content/extractor.go new file mode 100644 index 00000000000..75f69af16e1 --- /dev/null +++ b/services/search/pkg/content/extractor.go @@ -0,0 +1,33 @@ +package content + +import ( + "context" + "errors" + "fmt" + + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" +) + +// Extractor is responsible to extract content and meta information from documents. +// +//go:generate mockery --name=Extractor +type Extractor interface { + Extract(ctx context.Context, ri *provider.ResourceInfo) (Document, error) +} + +func getFirstValue(m map[string][]string, key string) (string, error) { + if m == nil { + return "", errors.New("undefined map") + } + + v, ok := m[key] + if !ok { + return "", fmt.Errorf("unknown key: %v", key) + } + + if len(m) == 0 { + return "", fmt.Errorf("no values for: %v", key) + } + + return v[0], nil +} diff --git a/services/search/pkg/content/mocks/extractor.go b/services/search/pkg/content/mocks/extractor.go new file mode 100644 index 00000000000..d3f705014b5 --- /dev/null +++ b/services/search/pkg/content/mocks/extractor.go @@ -0,0 +1,54 @@ +// Code generated by mockery v2.14.1. DO NOT EDIT. + +package mocks + +import ( + context "context" + + content "github.com/owncloud/ocis/v2/services/search/pkg/content" + + mock "github.com/stretchr/testify/mock" + + providerv1beta1 "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" +) + +// Extractor is an autogenerated mock type for the Extractor type +type Extractor struct { + mock.Mock +} + +// Extract provides a mock function with given fields: ctx, ri +func (_m *Extractor) Extract(ctx context.Context, ri *providerv1beta1.ResourceInfo) (content.Document, error) { + ret := _m.Called(ctx, ri) + + var r0 content.Document + if rf, ok := ret.Get(0).(func(context.Context, *providerv1beta1.ResourceInfo) content.Document); ok { + r0 = rf(ctx, ri) + } else { + r0 = ret.Get(0).(content.Document) + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, *providerv1beta1.ResourceInfo) error); ok { + r1 = rf(ctx, ri) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +type mockConstructorTestingTNewExtractor interface { + mock.TestingT + Cleanup(func()) +} + +// NewExtractor creates a new instance of Extractor. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewExtractor(t mockConstructorTestingTNewExtractor) *Extractor { + mock := &Extractor{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/services/search/pkg/content/mocks/retriever.go b/services/search/pkg/content/mocks/retriever.go new file mode 100644 index 00000000000..bc380c7e690 --- /dev/null +++ b/services/search/pkg/content/mocks/retriever.go @@ -0,0 +1,55 @@ +// Code generated by mockery v2.14.1. DO NOT EDIT. + +package mocks + +import ( + context "context" + io "io" + + mock "github.com/stretchr/testify/mock" + + providerv1beta1 "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" +) + +// Retriever is an autogenerated mock type for the Retriever type +type Retriever struct { + mock.Mock +} + +// Retrieve provides a mock function with given fields: ctx, rID +func (_m *Retriever) Retrieve(ctx context.Context, rID *providerv1beta1.ResourceId) (io.ReadCloser, error) { + ret := _m.Called(ctx, rID) + + var r0 io.ReadCloser + if rf, ok := ret.Get(0).(func(context.Context, *providerv1beta1.ResourceId) io.ReadCloser); ok { + r0 = rf(ctx, rID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(io.ReadCloser) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, *providerv1beta1.ResourceId) error); ok { + r1 = rf(ctx, rID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +type mockConstructorTestingTNewRetriever interface { + mock.TestingT + Cleanup(func()) +} + +// NewRetriever creates a new instance of Retriever. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewRetriever(t mockConstructorTestingTNewRetriever) *Retriever { + mock := &Retriever{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/services/search/pkg/content/retriever.go b/services/search/pkg/content/retriever.go new file mode 100644 index 00000000000..1548a5bb9ca --- /dev/null +++ b/services/search/pkg/content/retriever.go @@ -0,0 +1,31 @@ +package content + +import ( + "context" + "io" + + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "google.golang.org/grpc/metadata" +) + +// Retriever is the interface that wraps the basic Retrieve method. 🐕 +// It requests and then returns a resource from the underlying storage. +// +//go:generate mockery --name=Retriever +type Retriever interface { + Retrieve(ctx context.Context, rID *provider.ResourceId) (io.ReadCloser, error) +} + +func contextGet(ctx context.Context, k string) (string, bool) { + md, ok := metadata.FromOutgoingContext(ctx) + if !ok { + return "", false + } + + token, ok := md[k] + if len(token) == 0 || !ok { + return "", false + } + + return token[0], ok +} diff --git a/services/search/pkg/content/tika.go b/services/search/pkg/content/tika.go new file mode 100644 index 00000000000..a8ecd5ae82e --- /dev/null +++ b/services/search/pkg/content/tika.go @@ -0,0 +1,86 @@ +package content + +import ( + "context" + "fmt" + "strings" + + "github.com/bbalet/stopwords" + gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/google/go-tika/tika" + "github.com/owncloud/ocis/v2/ocis-pkg/log" + "github.com/owncloud/ocis/v2/services/search/pkg/config" +) + +// Tika is used to extract content from a resource, +// it uses apache tika to retrieve all the data. +type Tika struct { + *Basic + Retriever + tika *tika.Client +} + +// NewTikaExtractor creates a new Tika instance. +func NewTikaExtractor(gw gateway.GatewayAPIClient, logger log.Logger, cfg *config.Config) (*Tika, error) { + basic, err := NewBasicExtractor(logger) + if err != nil { + return nil, err + } + + tk := tika.NewClient(nil, cfg.Extractor.Tika.TikaURL) + tkv, err := tk.Version(context.Background()) + if err != nil { + return nil, err + } + logger.Info().Msgf("Tika version: %s", tkv) + + return &Tika{ + Basic: basic, + Retriever: newCS3Retriever(gw, logger, cfg.Extractor.CS3AllowInsecure), + tika: tika.NewClient(nil, cfg.Extractor.Tika.TikaURL), + }, nil +} + +// Extract loads a resource from its underlying storage, passes it to tika and processes the result into a Document. +func (t Tika) Extract(ctx context.Context, ri *provider.ResourceInfo) (Document, error) { + doc, err := t.Basic.Extract(ctx, ri) + if err != nil { + return doc, err + } + + if ri.Size == 0 { + return doc, nil + } + + if ri.Type != provider.ResourceType_RESOURCE_TYPE_FILE { + return doc, nil + } + + data, err := t.Retrieve(ctx, ri.Id) + if err != nil { + return doc, err + } + defer data.Close() + + metas, err := t.tika.MetaRecursive(ctx, data) + if err != nil { + return doc, err + } + + for _, meta := range metas { + if title, err := getFirstValue(meta, "title"); err == nil { + doc.Title = strings.TrimSpace(fmt.Sprintf("%s %s", doc.Title, title)) + } + + if content, err := getFirstValue(meta, "X-TIKA:content"); err == nil { + doc.Content = strings.TrimSpace(fmt.Sprintf("%s %s", doc.Content, content)) + } + } + + if lang, _ := t.tika.LanguageString(ctx, doc.Content); lang != "" { + doc.Content = stopwords.CleanString(doc.Content, lang, true) + } + + return doc, nil +} diff --git a/services/search/pkg/content/tika_test.go b/services/search/pkg/content/tika_test.go new file mode 100644 index 00000000000..fa261182417 --- /dev/null +++ b/services/search/pkg/content/tika_test.go @@ -0,0 +1,96 @@ +package content_test + +import ( + "context" + "fmt" + "io" + "net/http" + "net/http/httptest" + "strings" + + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/owncloud/ocis/v2/ocis-pkg/log" + conf "github.com/owncloud/ocis/v2/services/search/pkg/config/defaults" + "github.com/owncloud/ocis/v2/services/search/pkg/content" + contentMocks "github.com/owncloud/ocis/v2/services/search/pkg/content/mocks" + "github.com/stretchr/testify/mock" +) + +var _ = Describe("Tika", func() { + Describe("extract", func() { + var ( + body string + language string + version string + srv *httptest.Server + tika *content.Tika + ) + + BeforeEach(func() { + body = "" + language = "" + version = "" + srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + out := "" + switch req.URL.Path { + case "/version": + out = version + case "/language/string": + out = language + case "/rmeta/text": + out = fmt.Sprintf(`[{"X-TIKA:content":"%s"}]`, body) + } + + _, _ = w.Write([]byte(out)) + })) + + cfg := conf.DefaultConfig() + cfg.Extractor.Tika.TikaURL = srv.URL + + var err error + tika, err = content.NewTikaExtractor(nil, log.NewLogger(), cfg) + Expect(err).ToNot(HaveOccurred()) + Expect(tika).ToNot(BeNil()) + + retriever := &contentMocks.Retriever{} + retriever.On("Retrieve", mock.Anything, mock.Anything, mock.Anything).Return(io.NopCloser(strings.NewReader(body)), nil) + + tika.Retriever = retriever + }) + + AfterEach(func() { + srv.Close() + }) + + It("skips non file resources", func() { + doc, err := tika.Extract(context.TODO(), &provider.ResourceInfo{}) + Expect(err).ToNot(HaveOccurred()) + Expect(doc.Content).To(Equal("")) + }) + + It("adds content", func() { + body = "any body" + + doc, err := tika.Extract(context.TODO(), &provider.ResourceInfo{ + Type: provider.ResourceType_RESOURCE_TYPE_FILE, + Size: 1, + }) + Expect(err).ToNot(HaveOccurred()) + Expect(doc.Content).To(Equal(body)) + }) + + It("removes stop words", func() { + body = "body to test stop words!!! I, you, he, she, it, we, you, they, stay" + language = "en" + + doc, err := tika.Extract(context.TODO(), &provider.ResourceInfo{ + Type: provider.ResourceType_RESOURCE_TYPE_FILE, + Size: 1, + }) + Expect(err).ToNot(HaveOccurred()) + Expect(doc.Content).To(Equal("body test stop words i stay ")) + }) + }) +}) diff --git a/services/search/pkg/engine/bleve.go b/services/search/pkg/engine/bleve.go new file mode 100644 index 00000000000..565eba1490a --- /dev/null +++ b/services/search/pkg/engine/bleve.go @@ -0,0 +1,368 @@ +package engine + +import ( + "context" + "errors" + "math" + "path" + "path/filepath" + "strings" + "time" + + "github.com/blevesearch/bleve/v2/analysis/token/porter" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" + "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/single" + "github.com/blevesearch/bleve/v2/mapping" + "github.com/blevesearch/bleve/v2/search/query" + storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/cs3org/reva/v2/pkg/storagespace" + "github.com/cs3org/reva/v2/pkg/utils" + searchMessage "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0" + searchService "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" + "github.com/owncloud/ocis/v2/services/search/pkg/content" + "google.golang.org/protobuf/types/known/timestamppb" +) + +// Bleve represents a search engine which utilizes bleve to search and store resources. +type Bleve struct { + index bleve.Index +} + +// NewBleveIndex returns a new bleve index +// given path must exist. +func NewBleveIndex(root string) (bleve.Index, error) { + destination := filepath.Join(root, "bleve") + index, err := bleve.Open(destination) + if errors.Is(bleve.ErrorIndexPathDoesNotExist, err) { + m, err := BuildBleveMapping() + if err != nil { + return nil, err + } + index, err = bleve.New(destination, m) + if err != nil { + return nil, err + } + + return index, nil + } + + return index, err +} + +// NewBleveEngine creates a new Bleve instance +func NewBleveEngine(index bleve.Index) *Bleve { + return &Bleve{ + index: index, + } +} + +// BuildBleveMapping builds a bleve index mapping which can be used for indexing +func BuildBleveMapping() (mapping.IndexMapping, error) { + lowercaseMapping := bleve.NewTextFieldMapping() + lowercaseMapping.Analyzer = "lowercaseKeyword" + + fulltextFieldMapping := bleve.NewTextFieldMapping() + fulltextFieldMapping.Analyzer = "fulltext" + + docMapping := bleve.NewDocumentMapping() + docMapping.AddFieldMappingsAt("Name", lowercaseMapping) + //docMapping.AddFieldMappingsAt("Tags", lowercaseMapping) + docMapping.AddFieldMappingsAt("Content", fulltextFieldMapping) + + indexMapping := bleve.NewIndexMapping() + indexMapping.DefaultAnalyzer = keyword.Name + indexMapping.DefaultMapping = docMapping + err := indexMapping.AddCustomAnalyzer("lowercaseKeyword", + map[string]interface{}{ + "type": custom.Name, + "tokenizer": single.Name, + "token_filters": []string{ + lowercase.Name, + }, + }, + ) + if err != nil { + return nil, err + } + + err = indexMapping.AddCustomAnalyzer("fulltext", + map[string]interface{}{ + "type": custom.Name, + "tokenizer": unicode.Name, + "token_filters": []string{ + lowercase.Name, + porter.Name, + }, + }, + ) + if err != nil { + return nil, err + } + + return indexMapping, nil +} + +// Search executes a search request operation within the index. +// Returns a SearchIndexResponse object or an error. +func (b *Bleve) Search(_ context.Context, sir *searchService.SearchIndexRequest) (*searchService.SearchIndexResponse, error) { + q := bleve.NewConjunctionQuery( + // Skip documents that have been marked as deleted + &query.BoolFieldQuery{ + Bool: false, + FieldVal: "Deleted", + }, + &query.QueryStringQuery{ + Query: formatQuery(sir.Query), + }, + ) + + if sir.Ref != nil { + q.Conjuncts = append( + q.Conjuncts, + &query.TermQuery{ + FieldVal: "RootID", + Term: storagespace.FormatResourceID( + storageProvider.ResourceId{ + StorageId: sir.Ref.GetResourceId().GetStorageId(), + SpaceId: sir.Ref.GetResourceId().GetSpaceId(), + OpaqueId: sir.Ref.GetResourceId().GetOpaqueId(), + }, + ), + }, + &query.PrefixQuery{ + Prefix: utils.MakeRelativePath(path.Join(sir.Ref.Path, "/")), + FieldVal: "Path", + }, + ) + } + + bleveReq := bleve.NewSearchRequest(q) + + switch { + case sir.PageSize == -1: + bleveReq.Size = math.MaxInt + case sir.PageSize == 0: + bleveReq.Size = 200 + default: + bleveReq.Size = int(sir.PageSize) + } + + bleveReq.Fields = []string{"*"} + res, err := b.index.Search(bleveReq) + if err != nil { + return nil, err + } + + matches := make([]*searchMessage.Match, 0, len(res.Hits)) + for _, hit := range res.Hits { + rootID, err := storagespace.ParseID(getValue[string](hit.Fields, "RootID")) + if err != nil { + return nil, err + } + + rID, err := storagespace.ParseID(getValue[string](hit.Fields, "ID")) + if err != nil { + return nil, err + } + + pID, _ := storagespace.ParseID(getValue[string](hit.Fields, "ParentID")) + match := &searchMessage.Match{ + Score: float32(hit.Score), + Entity: &searchMessage.Entity{ + Ref: &searchMessage.Reference{ + ResourceId: resourceIDtoSearchID(rootID), + Path: getValue[string](hit.Fields, "Path"), + }, + Id: resourceIDtoSearchID(rID), + Name: getValue[string](hit.Fields, "Name"), + ParentId: resourceIDtoSearchID(pID), + Size: uint64(getValue[float64](hit.Fields, "Size")), + Type: uint64(getValue[float64](hit.Fields, "Type")), + MimeType: getValue[string](hit.Fields, "MimeType"), + Deleted: getValue[bool](hit.Fields, "Deleted"), + //Tags: getSliceValue[string](hit.Fields, "Tags"), + }, + } + + if mtime, err := time.Parse(time.RFC3339, getValue[string](hit.Fields, "Mtime")); err == nil { + match.Entity.LastModifiedTime = ×tamppb.Timestamp{Seconds: mtime.Unix(), Nanos: int32(mtime.Nanosecond())} + } + + matches = append(matches, match) + } + + return &searchService.SearchIndexResponse{ + Matches: matches, + TotalMatches: int32(res.Total), + }, nil +} + +// Upsert indexes or stores Resource data fields. +func (b *Bleve) Upsert(id string, r Resource) error { + return b.index.Index(id, r) +} + +// Move updates the resource location and all of its necessary fields. +func (b *Bleve) Move(id string, parentid string, target string) error { + r, err := b.getResource(id) + if err != nil { + return err + } + currentPath := r.Path + nextPath := utils.MakeRelativePath(target) + + r, err = b.updateEntity(id, func(r *Resource) { + r.Path = nextPath + r.Name = path.Base(nextPath) + r.ParentID = parentid + }) + if err != nil { + return err + } + + if r.Type == uint64(storageProvider.ResourceType_RESOURCE_TYPE_CONTAINER) { + q := bleve.NewConjunctionQuery( + bleve.NewQueryStringQuery("RootID:"+r.RootID), + bleve.NewQueryStringQuery("Path:"+escapeQuery(currentPath+"/*")), + ) + bleveReq := bleve.NewSearchRequest(q) + bleveReq.Size = math.MaxInt + bleveReq.Fields = []string{"*"} + res, err := b.index.Search(bleveReq) + if err != nil { + return err + } + + for _, h := range res.Hits { + _, err := b.updateEntity(h.ID, func(r *Resource) { + r.Path = strings.Replace(r.Path, currentPath, nextPath, 1) + }) + if err != nil { + return err + } + } + } + + return nil +} + +// Delete marks the resource as deleted. +// The resource object will stay in the bleve index, +// instead of removing the resource it just marks it as deleted! +// can be undone +func (b *Bleve) Delete(id string) error { + return b.setDeleted(id, true) +} + +// Restore is the counterpart to Delete. +// It restores the resource which makes it available again. +func (b *Bleve) Restore(id string) error { + return b.setDeleted(id, false) +} + +// Purge removes a resource from the index, irreversible operation. +func (b *Bleve) Purge(id string) error { + return b.index.Delete(id) +} + +// DocCount returns the number of resources in the index. +func (b *Bleve) DocCount() (uint64, error) { + return b.index.DocCount() +} + +func (b *Bleve) getResource(id string) (*Resource, error) { + req := bleve.NewSearchRequest(bleve.NewDocIDQuery([]string{id})) + req.Fields = []string{"*"} + res, err := b.index.Search(req) + if err != nil { + return nil, err + } + if res.Hits.Len() == 0 { + return nil, errors.New("entity not found") + } + + fields := res.Hits[0].Fields + + return &Resource{ + ID: getValue[string](fields, "ID"), + RootID: getValue[string](fields, "RootID"), + Path: getValue[string](fields, "Path"), + ParentID: getValue[string](fields, "ParentID"), + Type: uint64(getValue[float64](fields, "Type")), + Deleted: getValue[bool](fields, "Deleted"), + Document: content.Document{ + Name: getValue[string](fields, "Name"), + Title: getValue[string](fields, "Title"), + Size: uint64(getValue[float64](fields, "Size")), + Mtime: getValue[string](fields, "Mtime"), + MimeType: getValue[string](fields, "MimeType"), + Content: getValue[string](fields, "Content"), + //Tags: getSliceValue[string](fields, "Tags"), + }, + }, nil +} + +func (b *Bleve) updateEntity(id string, mutateFunc func(r *Resource)) (*Resource, error) { + it, err := b.getResource(id) + if err != nil { + return nil, err + } + + mutateFunc(it) + + return it, b.index.Index(it.ID, it) +} + +func (b *Bleve) setDeleted(id string, deleted bool) error { + it, err := b.updateEntity(id, func(r *Resource) { + r.Deleted = deleted + }) + if err != nil { + return err + } + + if it.Type == uint64(storageProvider.ResourceType_RESOURCE_TYPE_CONTAINER) { + q := bleve.NewConjunctionQuery( + bleve.NewQueryStringQuery("RootID:"+it.RootID), + bleve.NewQueryStringQuery("Path:"+escapeQuery(it.Path+"/*")), + ) + bleveReq := bleve.NewSearchRequest(q) + bleveReq.Size = math.MaxInt + bleveReq.Fields = []string{"*"} + res, err := b.index.Search(bleveReq) + if err != nil { + return err + } + + for _, h := range res.Hits { + _, err := b.updateEntity(h.ID, func(r *Resource) { + r.Deleted = deleted + }) + if err != nil { + return err + } + } + } + + return nil +} + +func formatQuery(q string) string { + cq := q + fields := []string{"RootID", "Path", "ID", "Name", "Size", "Mtime", "MimeType", "Type"} + for _, field := range fields { + cq = strings.ReplaceAll(cq, strings.ToLower(field)+":", field+":") + } + + if strings.Contains(cq, ":") { + return cq // Sophisticated field based search + } + + // this is a basic filename search + return "Name:*" + strings.ReplaceAll(strings.ToLower(cq), " ", `\ `) + "*" +} diff --git a/services/search/pkg/engine/bleve_test.go b/services/search/pkg/engine/bleve_test.go new file mode 100644 index 00000000000..4e54659fc1e --- /dev/null +++ b/services/search/pkg/engine/bleve_test.go @@ -0,0 +1,376 @@ +package engine_test + +import ( + "context" + "fmt" + + "github.com/cs3org/reva/v2/pkg/storagespace" + + "github.com/blevesearch/bleve/v2" + sprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + searchmsg "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0" + searchsvc "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" + "github.com/owncloud/ocis/v2/services/search/pkg/content" + "github.com/owncloud/ocis/v2/services/search/pkg/engine" +) + +var _ = Describe("Bleve", func() { + var ( + eng *engine.Bleve + idx bleve.Index + ctx context.Context + + doSearch = func(id string, query string) (*searchsvc.SearchIndexResponse, error) { + rID, err := storagespace.ParseID(id) + if err != nil { + return nil, err + } + + return eng.Search(ctx, &searchsvc.SearchIndexRequest{ + Query: query, + Ref: &searchmsg.Reference{ + ResourceId: &searchmsg.ResourceID{ + StorageId: rID.StorageId, + SpaceId: rID.SpaceId, + OpaqueId: rID.OpaqueId, + }, + }, + }) + } + + assertDocCount = func(id string, query string, expectedCount int) []*searchmsg.Match { + res, err := doSearch(id, query) + + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + ExpectWithOffset(1, len(res.Matches)).To(Equal(expectedCount), "query returned unexpected number of results: "+query) + return res.Matches + } + + rootResource engine.Resource + parentResource engine.Resource + childResource engine.Resource + ) + + BeforeEach(func() { + mapping, err := engine.BuildBleveMapping() + Expect(err).ToNot(HaveOccurred()) + + idx, err = bleve.NewMemOnly(mapping) + Expect(err).ToNot(HaveOccurred()) + + eng = engine.NewBleveEngine(idx) + Expect(err).ToNot(HaveOccurred()) + + rootResource = engine.Resource{ + ID: "1$2!2", + RootID: "1$2!2", + Path: ".", + Document: content.Document{}, + } + + parentResource = engine.Resource{ + ID: "1$2!3", + ParentID: rootResource.ID, + RootID: rootResource.ID, + Path: "./parent d!r", + Type: uint64(sprovider.ResourceType_RESOURCE_TYPE_CONTAINER), + Document: content.Document{Name: "parent d!r"}, + } + + childResource = engine.Resource{ + ID: "1$2!4", + ParentID: parentResource.ID, + RootID: rootResource.ID, + Path: "./parent d!r/child.pdf", + Type: uint64(sprovider.ResourceType_RESOURCE_TYPE_FILE), + Document: content.Document{Name: "child.pdf"}, + } + }) + + Describe("New", func() { + It("returns a new index instance", func() { + b := engine.NewBleveEngine(idx) + Expect(b).ToNot(BeNil()) + }) + }) + + Describe("Search", func() { + Context("by other fields than filename", func() { + /*It("finds files by tags", func() { + parentResource.Document.Tags = []string{"foo", "bar"} + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, "Tags:foo", 1) + assertDocCount(rootResource.ID, "Tags:bar", 1) + assertDocCount(rootResource.ID, "Tags:foo Tags:bar", 1) + assertDocCount(rootResource.ID, "Tags:foo Tags:bar Tags:baz", 1) + assertDocCount(rootResource.ID, "Tags:foo Tags:bar Tags:baz", 1) + assertDocCount(rootResource.ID, "Tags:baz", 0) + })*/ + + It("finds files by size", func() { + parentResource.Document.Size = 12345 + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, "Size:12345", 1) + assertDocCount(rootResource.ID, "Size:>1000", 1) + assertDocCount(rootResource.ID, "Size:<100000", 1) + assertDocCount(rootResource.ID, "Size:12344", 0) + assertDocCount(rootResource.ID, "Size:<1000", 0) + assertDocCount(rootResource.ID, "Size:>100000", 0) + }) + }) + + Context("by filename", func() { + It("finds files with spaces in the filename", func() { + parentResource.Document.Name = "Foo oo.pdf" + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, `Name:foo\ o*`, 1) + }) + + It("finds files by digits in the filename", func() { + parentResource.Document.Name = "12345.pdf" + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, "Name:1234*", 1) + }) + + It("filters hidden files", func() { + childResource.Hidden = true + err := eng.Upsert(childResource.ID, childResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, "Hidden:T", 1) + assertDocCount(rootResource.ID, "Hidden:F", 0) + }) + + Context("with a file in the root of the space", func() { + It("scopes the search to the specified space", func() { + parentResource.Document.Name = "foo.pdf" + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, "Name:foo.pdf", 1) + assertDocCount("9$8!7", "Name:foo.pdf", 0) + }) + }) + + It("limits the search to the specified fields", func() { + parentResource.Document.Name = "bar.pdf" + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, "Name:bar.pdf", 1) + assertDocCount(rootResource.ID, "Unknown:field", 0) + }) + + It("returns the total number of hits", func() { + parentResource.Document.Name = "bar.pdf" + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + res, err := doSearch(rootResource.ID, "Name:bar*") + Expect(err).ToNot(HaveOccurred()) + Expect(res.TotalMatches).To(Equal(int32(1))) + }) + + It("returns all desired fields", func() { + parentResource.Document.Name = "bar.pdf" + parentResource.Type = 3 + parentResource.MimeType = "application/pdf" + + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + matches := assertDocCount(rootResource.ID, fmt.Sprintf("Name:%s", parentResource.Name), 1) + match := matches[0] + Expect(match.Entity.Ref.Path).To(Equal(parentResource.Path)) + Expect(match.Entity.Name).To(Equal(parentResource.Name)) + Expect(match.Entity.Size).To(Equal(parentResource.Size)) + Expect(match.Entity.Type).To(Equal(parentResource.Type)) + Expect(match.Entity.MimeType).To(Equal(parentResource.MimeType)) + Expect(match.Entity.Deleted).To(BeFalse()) + Expect(match.Score > 0).To(BeTrue()) + }) + + It("finds files by name, prefix or substring match", func() { + parentResource.Document.Name = "foo.pdf" + + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + queries := []string{"foo.pdf", "foo*", "*oo.p*"} + for _, query := range queries { + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, query, 1) + } + }) + + It("uses a lower-case index", func() { + parentResource.Document.Name = "foo.pdf" + + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, "Name:foo*", 1) + assertDocCount(rootResource.ID, "Name:Foo*", 0) + }) + + Context("and an additional file in a subdirectory", func() { + BeforeEach(func() { + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + err = eng.Upsert(childResource.ID, childResource) + Expect(err).ToNot(HaveOccurred()) + }) + + It("finds files living deeper in the tree by filename, prefix or substring match", func() { + queries := []string{"child.pdf", "child*", "*ld.*"} + for _, query := range queries { + assertDocCount(rootResource.ID, query, 1) + } + }) + }) + }) + }) + + Describe("Upsert", func() { + It("adds a resourceInfo to the index", func() { + err := eng.Upsert(childResource.ID, childResource) + Expect(err).ToNot(HaveOccurred()) + + count, err := idx.DocCount() + Expect(err).ToNot(HaveOccurred()) + Expect(count).To(Equal(uint64(1))) + + query := bleve.NewMatchQuery("child.pdf") + res, err := idx.Search(bleve.NewSearchRequest(query)) + Expect(err).ToNot(HaveOccurred()) + Expect(res.Hits.Len()).To(Equal(1)) + }) + + It("updates an existing resource in the index", func() { + + err := eng.Upsert(childResource.ID, childResource) + Expect(err).ToNot(HaveOccurred()) + + countA, err := idx.DocCount() + Expect(err).ToNot(HaveOccurred()) + Expect(countA).To(Equal(uint64(1))) + + err = eng.Upsert(childResource.ID, childResource) + Expect(err).ToNot(HaveOccurred()) + + countB, err := idx.DocCount() + Expect(err).ToNot(HaveOccurred()) + Expect(countB).To(Equal(uint64(1))) + }) + }) + + Describe("Delete", func() { + It("marks a resource as deleted", func() { + err := eng.Upsert(childResource.ID, childResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, "Name:*child*", 1) + + err = eng.Delete(childResource.ID) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, "Name:*child*", 0) + }) + + It("marks a child resources as deleted", func() { + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + err = eng.Upsert(childResource.ID, childResource) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, parentResource.Document.Name, 1) + assertDocCount(rootResource.ID, childResource.Document.Name, 1) + + err = eng.Delete(parentResource.ID) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, parentResource.Document.Name, 0) + assertDocCount(rootResource.ID, childResource.Document.Name, 0) + }) + }) + + Describe("Restore", func() { + It("also marks child resources as restored", func() { + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + err = eng.Upsert(childResource.ID, childResource) + Expect(err).ToNot(HaveOccurred()) + + err = eng.Delete(parentResource.ID) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, parentResource.Name, 0) + assertDocCount(rootResource.ID, childResource.Name, 0) + + err = eng.Restore(parentResource.ID) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, parentResource.Name, 1) + assertDocCount(rootResource.ID, childResource.Name, 1) + }) + }) + + Describe("Move", func() { + It("renames the parent and its child resources", func() { + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + err = eng.Upsert(childResource.ID, childResource) + Expect(err).ToNot(HaveOccurred()) + + parentResource.Path = "newname" + err = eng.Move(parentResource.ID, parentResource.ParentID, "./my/newname") + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(rootResource.ID, parentResource.Name, 0) + + matches := assertDocCount(rootResource.ID, "Name:child.pdf", 1) + Expect(matches[0].Entity.ParentId.OpaqueId).To(Equal("3")) + Expect(matches[0].Entity.Ref.Path).To(Equal("./my/newname/child.pdf")) + }) + + It("moves the parent and its child resources", func() { + err := eng.Upsert(parentResource.ID, parentResource) + Expect(err).ToNot(HaveOccurred()) + + err = eng.Upsert(childResource.ID, childResource) + Expect(err).ToNot(HaveOccurred()) + + parentResource.Path = " " + parentResource.ParentID = "1$2!somewhereopaqueid" + + err = eng.Move(parentResource.ID, parentResource.ParentID, "./somewhere/else/newname") + Expect(err).ToNot(HaveOccurred()) + assertDocCount(rootResource.ID, `parent d!r`, 0) + + matches := assertDocCount(rootResource.ID, "Name:child.pdf", 1) + Expect(matches[0].Entity.ParentId.OpaqueId).To(Equal("3")) + Expect(matches[0].Entity.Ref.Path).To(Equal("./somewhere/else/newname/child.pdf")) + + matches = assertDocCount(rootResource.ID, `newname`, 1) + Expect(matches[0].Entity.ParentId.OpaqueId).To(Equal("somewhereopaqueid")) + Expect(matches[0].Entity.Ref.Path).To(Equal("./somewhere/else/newname")) + + }) + }) +}) diff --git a/services/search/pkg/engine/engine.go b/services/search/pkg/engine/engine.go new file mode 100644 index 00000000000..74c465c9dd6 --- /dev/null +++ b/services/search/pkg/engine/engine.go @@ -0,0 +1,88 @@ +package engine + +import ( + "context" + "regexp" + + storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + searchMessage "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0" + searchService "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" + "github.com/owncloud/ocis/v2/services/search/pkg/content" +) + +var queryEscape = regexp.MustCompile(`([` + regexp.QuoteMeta(`+=&|> "foo" +// // bleve: []string{"foo", "bar"} -> []string{"foo", "bar"} +// switch v := iv.(type) { +// case T: +// add(v) +// case []interface{}: +// for _, rv := range v { +// add(rv) +// } +// } +// +// return +// } diff --git a/services/search/pkg/search/index/index_suite_test.go b/services/search/pkg/engine/engine_suite_test.go similarity index 58% rename from services/search/pkg/search/index/index_suite_test.go rename to services/search/pkg/engine/engine_suite_test.go index 09099db1a4c..062fcadb182 100644 --- a/services/search/pkg/search/index/index_suite_test.go +++ b/services/search/pkg/engine/engine_suite_test.go @@ -1,4 +1,4 @@ -package index_test +package engine_test import ( "testing" @@ -7,7 +7,7 @@ import ( . "github.com/onsi/gomega" ) -func TestIndex(t *testing.T) { +func TestEngine(t *testing.T) { RegisterFailHandler(Fail) - RunSpecs(t, "Index Suite") + RunSpecs(t, "Engine Suite") } diff --git a/services/search/pkg/engine/mocks/engine.go b/services/search/pkg/engine/mocks/engine.go new file mode 100644 index 00000000000..273fd2e93be --- /dev/null +++ b/services/search/pkg/engine/mocks/engine.go @@ -0,0 +1,146 @@ +// Code generated by mockery v2.14.1. DO NOT EDIT. + +package mocks + +import ( + context "context" + + engine "github.com/owncloud/ocis/v2/services/search/pkg/engine" + mock "github.com/stretchr/testify/mock" + + v0 "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" +) + +// Engine is an autogenerated mock type for the Engine type +type Engine struct { + mock.Mock +} + +// Delete provides a mock function with given fields: id +func (_m *Engine) Delete(id string) error { + ret := _m.Called(id) + + var r0 error + if rf, ok := ret.Get(0).(func(string) error); ok { + r0 = rf(id) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// DocCount provides a mock function with given fields: +func (_m *Engine) DocCount() (uint64, error) { + ret := _m.Called() + + var r0 uint64 + if rf, ok := ret.Get(0).(func() uint64); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(uint64) + } + + var r1 error + if rf, ok := ret.Get(1).(func() error); ok { + r1 = rf() + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// Move provides a mock function with given fields: id, parentid, target +func (_m *Engine) Move(id string, parentid string, target string) error { + ret := _m.Called(id, parentid, target) + + var r0 error + if rf, ok := ret.Get(0).(func(string, string, string) error); ok { + r0 = rf(id, parentid, target) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Purge provides a mock function with given fields: id +func (_m *Engine) Purge(id string) error { + ret := _m.Called(id) + + var r0 error + if rf, ok := ret.Get(0).(func(string) error); ok { + r0 = rf(id) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Restore provides a mock function with given fields: id +func (_m *Engine) Restore(id string) error { + ret := _m.Called(id) + + var r0 error + if rf, ok := ret.Get(0).(func(string) error); ok { + r0 = rf(id) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Search provides a mock function with given fields: ctx, req +func (_m *Engine) Search(ctx context.Context, req *v0.SearchIndexRequest) (*v0.SearchIndexResponse, error) { + ret := _m.Called(ctx, req) + + var r0 *v0.SearchIndexResponse + if rf, ok := ret.Get(0).(func(context.Context, *v0.SearchIndexRequest) *v0.SearchIndexResponse); ok { + r0 = rf(ctx, req) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*v0.SearchIndexResponse) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, *v0.SearchIndexRequest) error); ok { + r1 = rf(ctx, req) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// Upsert provides a mock function with given fields: id, r +func (_m *Engine) Upsert(id string, r engine.Resource) error { + ret := _m.Called(id, r) + + var r0 error + if rf, ok := ret.Get(0).(func(string, engine.Resource) error); ok { + r0 = rf(id, r) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +type mockConstructorTestingTNewEngine interface { + mock.TestingT + Cleanup(func()) +} + +// NewEngine creates a new instance of Engine. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewEngine(t mockConstructorTestingTNewEngine) *Engine { + mock := &Engine{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/services/search/pkg/logging/logging.go b/services/search/pkg/logging/logging.go index 28f1aebef13..50cac74c296 100644 --- a/services/search/pkg/logging/logging.go +++ b/services/search/pkg/logging/logging.go @@ -5,7 +5,7 @@ import ( "github.com/owncloud/ocis/v2/services/search/pkg/config" ) -// LoggerFromConfig initializes a service-specific logger instance. +// Configure initializes a service-specific logger instance. func Configure(name string, cfg *config.Log) log.Logger { return log.NewLogger( log.Name(name), diff --git a/services/search/pkg/search/debouncer.go b/services/search/pkg/search/debouncer.go new file mode 100644 index 00000000000..bb1336f3da2 --- /dev/null +++ b/services/search/pkg/search/debouncer.go @@ -0,0 +1,53 @@ +package search + +import ( + "sync" + "time" + + user "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" +) + +// SpaceDebouncer debounces operations on spaces for a configurable amount of time +type SpaceDebouncer struct { + after time.Duration + f func(id *provider.StorageSpaceId, userID *user.UserId) + pending map[string]*time.Timer + inProgress sync.Map + + mutex sync.Mutex +} + +// NewSpaceDebouncer returns a new SpaceDebouncer instance +func NewSpaceDebouncer(d time.Duration, f func(id *provider.StorageSpaceId, userID *user.UserId)) *SpaceDebouncer { + return &SpaceDebouncer{ + after: d, + f: f, + pending: map[string]*time.Timer{}, + inProgress: sync.Map{}, + } +} + +// Debounce restars the debounce timer for the given space +func (d *SpaceDebouncer) Debounce(id *provider.StorageSpaceId, userID *user.UserId) { + d.mutex.Lock() + defer d.mutex.Unlock() + + if t := d.pending[id.OpaqueId]; t != nil { + t.Stop() + } + + d.pending[id.OpaqueId] = time.AfterFunc(d.after, func() { + if _, ok := d.inProgress.Load(id.OpaqueId); ok { + // Reschedule this run for when the previous run has finished + d.mutex.Lock() + d.pending[id.OpaqueId].Reset(d.after) + d.mutex.Unlock() + return + } + + d.inProgress.Store(id.OpaqueId, true) + defer d.inProgress.Delete(id.OpaqueId) + d.f(id, userID) + }) +} diff --git a/services/search/pkg/search/debouncer_test.go b/services/search/pkg/search/debouncer_test.go new file mode 100644 index 00000000000..f4f9693275e --- /dev/null +++ b/services/search/pkg/search/debouncer_test.go @@ -0,0 +1,72 @@ +package search_test + +import ( + "time" + + user "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" + sprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/owncloud/ocis/v2/services/search/pkg/search" +) + +var _ = Describe("SpaceDebouncer", func() { + var ( + debouncer *search.SpaceDebouncer + callCount map[string]int + + userId = &user.UserId{ + OpaqueId: "user", + } + spaceid = &sprovider.StorageSpaceId{ + OpaqueId: "spaceid", + } + ) + + BeforeEach(func() { + callCount = map[string]int{} + debouncer = search.NewSpaceDebouncer(50*time.Millisecond, func(id *sprovider.StorageSpaceId, _ *user.UserId) { + callCount[id.OpaqueId] += 1 + }) + }) + + It("debounces", func() { + debouncer.Debounce(spaceid, userId) + debouncer.Debounce(spaceid, userId) + debouncer.Debounce(spaceid, userId) + Eventually(func() int { + return callCount["spaceid"] + }, "200ms").Should(Equal(1)) + }) + + It("works multiple times", func() { + debouncer.Debounce(spaceid, userId) + debouncer.Debounce(spaceid, userId) + debouncer.Debounce(spaceid, userId) + time.Sleep(100 * time.Millisecond) + + debouncer.Debounce(spaceid, userId) + debouncer.Debounce(spaceid, userId) + + Eventually(func() int { + return callCount["spaceid"] + }, "200ms").Should(Equal(2)) + }) + + It("doesn't trigger twice simultaneously", func() { + debouncer = search.NewSpaceDebouncer(50*time.Millisecond, func(id *sprovider.StorageSpaceId, _ *user.UserId) { + callCount[id.OpaqueId] += 1 + time.Sleep(300 * time.Millisecond) + }) + debouncer.Debounce(spaceid, userId) + time.Sleep(100 * time.Millisecond) // Let it trigger once + + debouncer.Debounce(spaceid, userId) + time.Sleep(100 * time.Millisecond) // shouldn't trigger as the other run is still in progress + Expect(callCount["spaceid"]).To(Equal(1)) + + Eventually(func() int { + return callCount["spaceid"] + }, "500ms").Should(Equal(2)) + }) +}) diff --git a/services/search/pkg/search/events.go b/services/search/pkg/search/events.go new file mode 100644 index 00000000000..1fc5e12c211 --- /dev/null +++ b/services/search/pkg/search/events.go @@ -0,0 +1,104 @@ +package search + +import ( + "time" + + user "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/cs3org/reva/v2/pkg/events" + "github.com/cs3org/reva/v2/pkg/storagespace" + "github.com/owncloud/ocis/v2/ocis-pkg/log" + "github.com/owncloud/ocis/v2/services/search/pkg/config" +) + +// HandleEvents listens to the needed events, +// it handles the whole resource indexing livecycle. +func HandleEvents(s Searcher, bus events.Consumer, logger log.Logger, cfg *config.Config) error { + evts := []events.Unmarshaller{ + events.ItemTrashed{}, + events.ItemRestored{}, + events.ItemMoved{}, + events.ContainerCreated{}, + events.FileTouched{}, + events.FileVersionRestored{}, + //events.TagsAdded{}, + //events.TagsRemoved{}, + } + + if cfg.Events.AsyncUploads { + // evts = append(evts, events.UploadReady{}) + } else { + evts = append(evts, events.FileUploaded{}) + } + + ch, err := events.Consume(bus, "search", evts...) + if err != nil { + return err + } + + if cfg.Events.NumConsumers == 0 { + cfg.Events.NumConsumers = 1 + } + + spaceID := func(ref *provider.Reference) *provider.StorageSpaceId { + return &provider.StorageSpaceId{ + OpaqueId: storagespace.FormatResourceID( + provider.ResourceId{ + StorageId: ref.GetResourceId().GetStorageId(), + SpaceId: ref.GetResourceId().GetSpaceId(), + }, + ), + } + } + + indexSpaceDebouncer := NewSpaceDebouncer(time.Duration(cfg.Events.DebounceDuration)*time.Millisecond, func(id *provider.StorageSpaceId, userID *user.UserId) { + if err := s.IndexSpace(id, userID); err != nil { + logger.Error().Err(err).Interface("spaceID", id).Interface("userID", userID).Msg("error while indexing a space") + } + }) + + for i := 0; i < cfg.Events.NumConsumers; i++ { + go func(s Searcher, ch <-chan interface{}) { + for e := range ch { + logger.Debug().Interface("event", e).Msg("updating index") + + var err error + + switch ev := e.(type) { + case events.ItemTrashed: + s.TrashItem(ev.ID) + indexSpaceDebouncer.Debounce(spaceID(ev.Ref), ev.Executant) + case events.ItemMoved: + s.MoveItem(ev.Ref, ev.Executant) + indexSpaceDebouncer.Debounce(spaceID(ev.Ref), ev.Executant) + case events.ItemRestored: + s.RestoreItem(ev.Ref, ev.Executant) + indexSpaceDebouncer.Debounce(spaceID(ev.Ref), ev.Executant) + case events.ContainerCreated: + indexSpaceDebouncer.Debounce(spaceID(ev.Ref), ev.Executant) + case events.FileTouched: + indexSpaceDebouncer.Debounce(spaceID(ev.Ref), ev.Executant) + case events.FileVersionRestored: + indexSpaceDebouncer.Debounce(spaceID(ev.Ref), ev.Executant) + //case events.TagsAdded: + // indexSpaceDebouncer.Debounce(spaceID(ev.Ref), ev.Executant) + //case events.TagsRemoved: + //indexSpaceDebouncer.Debounce(spaceID(ev.Ref), ev.Executant) + case events.FileUploaded: + indexSpaceDebouncer.Debounce(spaceID(ev.Ref), ev.Executant) + //case events.UploadReady: + //indexSpaceDebouncer.Debounce(spaceID(ev.FileRef), ev.ExecutingUser.Id) + } + + if err != nil { + logger.Error().Err(err).Interface("event", e) + } + } + }( + s, + ch, + ) + } + + return nil +} diff --git a/services/search/pkg/search/events_test.go b/services/search/pkg/search/events_test.go new file mode 100644 index 00000000000..b7966dbbfd7 --- /dev/null +++ b/services/search/pkg/search/events_test.go @@ -0,0 +1,53 @@ +package search_test + +import ( + "github.com/cs3org/reva/v2/pkg/events" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/owncloud/ocis/v2/ocis-pkg/log" + "github.com/owncloud/ocis/v2/services/search/pkg/config" + "github.com/owncloud/ocis/v2/services/search/pkg/search" + searchMocks "github.com/owncloud/ocis/v2/services/search/pkg/search/mocks" + "github.com/stretchr/testify/mock" + mEvents "go-micro.dev/v4/events" +) + +var _ = DescribeTable("events", + func(mcks []string, e interface{}, asyncUploads bool) { + var ( + s = &searchMocks.Searcher{} + calls int + ) + + bus, _ := mEvents.NewStream() + + search.HandleEvents(s, bus, log.NewLogger(), &config.Config{ + Events: config.Events{ + AsyncUploads: asyncUploads, + }, + }) + + for _, mck := range mcks { + s.On(mck, mock.Anything, mock.Anything).Return(nil).Run(func(args mock.Arguments) { + calls += 1 + }) + } + + err := events.Publish(bus, e) + + Expect(err).To(BeNil()) + Eventually(func() int { + return calls + }, "2s").Should(Equal(len(mcks))) + }, + Entry("ItemTrashed", []string{"TrashItem", "IndexSpace"}, events.ItemTrashed{}, false), + Entry("ItemMoved", []string{"MoveItem", "IndexSpace"}, events.ItemMoved{}, false), + Entry("ItemRestored", []string{"RestoreItem", "IndexSpace"}, events.ItemRestored{}, false), + Entry("ContainerCreated", []string{"IndexSpace"}, events.ContainerCreated{}, false), + Entry("FileTouched", []string{"IndexSpace"}, events.FileTouched{}, false), + Entry("FileVersionRestored", []string{"IndexSpace"}, events.FileVersionRestored{}, false), + //Entry("TagsAdded", []string{"IndexSpace"}, events.TagsAdded{}, false), + //Entry("TagsRemoved", []string{"IndexSpace"}, events.TagsRemoved{}, false), + Entry("FileUploaded", []string{"IndexSpace"}, events.FileUploaded{}, false), + //Entry("UploadReady", []string{"IndexSpace"}, events.UploadReady{ExecutingUser: &userv1beta1.User{}}, true), +) diff --git a/services/search/pkg/search/index/index.go b/services/search/pkg/search/index/index.go deleted file mode 100644 index 92ff8ec91f9..00000000000 --- a/services/search/pkg/search/index/index.go +++ /dev/null @@ -1,391 +0,0 @@ -// Copyright 2018-2022 CERN -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// In applying this license, CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -package index - -import ( - "context" - "errors" - "math" - "path" - "regexp" - "strings" - "time" - - bleve "github.com/blevesearch/bleve/v2" - "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" - "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" - "github.com/blevesearch/bleve/v2/analysis/token/lowercase" - "github.com/blevesearch/bleve/v2/analysis/tokenizer/single" - "github.com/blevesearch/bleve/v2/mapping" - "github.com/blevesearch/bleve/v2/search" - "github.com/cs3org/reva/v2/pkg/storagespace" - searchTracing "github.com/owncloud/ocis/v2/services/search/pkg/tracing" - "go.opentelemetry.io/otel/attribute" - "google.golang.org/protobuf/types/known/timestamppb" - - sprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" - "github.com/cs3org/reva/v2/pkg/utils" - searchmsg "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0" - searchsvc "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" -) - -type indexDocument struct { - RootID string - Path string - ID string - ParentID string - - Name string - Size uint64 - Mtime string - MimeType string - Type uint64 - - Deleted bool - Hidden bool -} - -// Index represents a bleve based search index -type Index struct { - bleveIndex bleve.Index -} - -// NewPersisted returns a new instance of Index with the data being persisted in the given directory -func NewPersisted(path string) (*Index, error) { - mapping, err := BuildMapping() - if err != nil { - return nil, err - } - bi, err := bleve.New(path, mapping) - if err != nil { - return nil, err - } - return New(bi) -} - -// New returns a new instance of Index using the given bleve Index as the backend -func New(bleveIndex bleve.Index) (*Index, error) { - return &Index{ - bleveIndex: bleveIndex, - }, nil -} - -// DocCount returns the number of elemenst in the index -func (i *Index) DocCount() (uint64, error) { - return i.bleveIndex.DocCount() -} - -// Add adds a new entity to the Index -func (i *Index) Add(ref *sprovider.Reference, ri *sprovider.ResourceInfo) error { - entity := toEntity(ref, ri) - return i.bleveIndex.Index(idToBleveId(ri.Id), entity) -} - -// Delete marks an entity from the index as deleten (still keeping it around) -func (i *Index) Delete(id *sprovider.ResourceId) error { - return i.markAsDeleted(idToBleveId(id), true) -} - -// Restore marks an entity from the index as not being deleted -func (i *Index) Restore(id *sprovider.ResourceId) error { - return i.markAsDeleted(idToBleveId(id), false) -} - -func (i *Index) markAsDeleted(id string, deleted bool) error { - doc, err := i.updateEntity(id, func(doc *indexDocument) { - doc.Deleted = deleted - }) - if err != nil { - return err - } - - if doc.Type == uint64(sprovider.ResourceType_RESOURCE_TYPE_CONTAINER) { - query := bleve.NewConjunctionQuery( - bleve.NewQueryStringQuery("RootID:"+doc.RootID), - bleve.NewQueryStringQuery("Path:"+queryEscape(doc.Path+"/*")), - ) - bleveReq := bleve.NewSearchRequest(query) - bleveReq.Size = math.MaxInt - bleveReq.Fields = []string{"*"} - res, err := i.bleveIndex.Search(bleveReq) - if err != nil { - return err - } - - for _, h := range res.Hits { - _, err := i.updateEntity(h.ID, func(doc *indexDocument) { - doc.Deleted = deleted - }) - if err != nil { - return err - } - } - } - - return nil -} - -func (i *Index) updateEntity(id string, mutateFunc func(doc *indexDocument)) (*indexDocument, error) { - doc, err := i.getEntity(id) - if err != nil { - return nil, err - } - mutateFunc(doc) - err = i.bleveIndex.Index(doc.ID, doc) - if err != nil { - return nil, err - } - - return doc, nil -} - -func (i *Index) getEntity(id string) (*indexDocument, error) { - req := bleve.NewSearchRequest(bleve.NewDocIDQuery([]string{id})) - req.Fields = []string{"*"} - res, err := i.bleveIndex.Search(req) - if err != nil { - return nil, err - } - if res.Hits.Len() == 0 { - return nil, errors.New("entity not found") - } - return fieldsToEntity(res.Hits[0].Fields), nil -} - -// Purge removes an entity from the index -func (i *Index) Purge(id *sprovider.ResourceId) error { - return i.bleveIndex.Delete(idToBleveId(id)) -} - -// Move update the path of an entry and all its children -func (i *Index) Move(id, newParentID *sprovider.ResourceId, fullPath string) error { - bleveId := idToBleveId(id) - doc, err := i.getEntity(bleveId) - if err != nil { - return err - } - oldName := doc.Path - newName := utils.MakeRelativePath(fullPath) - - doc, err = i.updateEntity(bleveId, func(doc *indexDocument) { - doc.Path = newName - doc.Name = path.Base(newName) - doc.ParentID = idToBleveId(newParentID) - }) - if err != nil { - return err - } - - if doc.Type == uint64(sprovider.ResourceType_RESOURCE_TYPE_CONTAINER) { - query := bleve.NewConjunctionQuery( - bleve.NewQueryStringQuery("RootID:"+doc.RootID), - bleve.NewQueryStringQuery("Path:"+queryEscape(oldName+"/*")), - ) - bleveReq := bleve.NewSearchRequest(query) - bleveReq.Size = math.MaxInt - bleveReq.Fields = []string{"*"} - res, err := i.bleveIndex.Search(bleveReq) - if err != nil { - return err - } - - for _, h := range res.Hits { - _, err := i.updateEntity(h.ID, func(doc *indexDocument) { - doc.Path = strings.Replace(doc.Path, oldName, newName, 1) - }) - if err != nil { - return err - } - } - } - - return nil -} - -// Search searches the index according to the criteria specified in the given SearchIndexRequest -func (i *Index) Search(ctx context.Context, req *searchsvc.SearchIndexRequest) (*searchsvc.SearchIndexResponse, error) { - _, span := searchTracing.TraceProvider.Tracer("search").Start(ctx, "search index") - defer span.End() - deletedQuery := bleve.NewBoolFieldQuery(false) - deletedQuery.SetField("Deleted") - query := bleve.NewConjunctionQuery( - bleve.NewQueryStringQuery(req.Query), - deletedQuery, // Skip documents that have been marked as deleted - ) - span.SetAttributes(attribute.String("query", req.GetQuery())) - span.SetAttributes(attribute.String("reference", req.GetRef().String())) - if req.Ref != nil { - query = bleve.NewConjunctionQuery( - query, - bleve.NewQueryStringQuery("RootID:"+idToBleveId(&sprovider.ResourceId{ - StorageId: req.Ref.GetResourceId().GetStorageId(), - SpaceId: req.Ref.GetResourceId().GetSpaceId(), - OpaqueId: req.Ref.GetResourceId().GetOpaqueId(), - })), // Limit search to the space - bleve.NewQueryStringQuery("Path:"+queryEscape(utils.MakeRelativePath(path.Join(req.Ref.Path, "/"))+"*")), // Limit search to this directory in the space - ) - } - bleveReq := bleve.NewSearchRequest(query) - bleveReq.Size = 200 - if req.PageSize > 0 { - bleveReq.Size = int(req.PageSize) - } - bleveReq.Fields = []string{"*"} - res, err := i.bleveIndex.Search(bleveReq) - if err != nil { - return nil, err - } - - matches := []*searchmsg.Match{} - for _, h := range res.Hits { - match, err := fromDocumentMatch(h) - if err != nil { - return nil, err - } - matches = append(matches, match) - } - - return &searchsvc.SearchIndexResponse{ - Matches: matches, - TotalMatches: int32(res.Total), - }, nil -} - -// BuildMapping builds a bleve index mapping which can be used for indexing -func BuildMapping() (mapping.IndexMapping, error) { - nameMapping := bleve.NewTextFieldMapping() - nameMapping.Analyzer = "lowercaseKeyword" - - docMapping := bleve.NewDocumentMapping() - docMapping.AddFieldMappingsAt("Name", nameMapping) - - indexMapping := bleve.NewIndexMapping() - indexMapping.DefaultAnalyzer = keyword.Name - indexMapping.DefaultMapping = docMapping - err := indexMapping.AddCustomAnalyzer("lowercaseKeyword", - map[string]interface{}{ - "type": custom.Name, - "tokenizer": single.Name, - "token_filters": []string{ - lowercase.Name, - }, - }) - if err != nil { - return nil, err - } - - return indexMapping, nil -} - -func toEntity(ref *sprovider.Reference, ri *sprovider.ResourceInfo) *indexDocument { - doc := &indexDocument{ - RootID: idToBleveId(ref.ResourceId), - Path: ref.Path, - ID: idToBleveId(ri.Id), - ParentID: idToBleveId(ri.ParentId), - Name: ri.Name, - Size: ri.Size, - MimeType: ri.MimeType, - Type: uint64(ri.Type), - Deleted: false, - Hidden: strings.HasPrefix(ri.Path, "."), - } - - if ri.Mtime != nil { - doc.Mtime = time.Unix(int64(ri.Mtime.Seconds), int64(ri.Mtime.Nanos)).UTC().Format(time.RFC3339Nano) - } - - return doc -} - -func fieldsToEntity(fields map[string]interface{}) *indexDocument { - doc := &indexDocument{ - RootID: fields["RootID"].(string), - Path: fields["Path"].(string), - ID: fields["ID"].(string), - ParentID: fields["ParentID"].(string), - Name: fields["Name"].(string), - Size: uint64(fields["Size"].(float64)), - Mtime: fields["Mtime"].(string), - MimeType: fields["MimeType"].(string), - Type: uint64(fields["Type"].(float64)), - Deleted: fields["Deleted"].(bool), - Hidden: fields["Hidden"].(bool), - } - return doc -} - -func fromDocumentMatch(hit *search.DocumentMatch) (*searchmsg.Match, error) { - rootID, err := storagespace.ParseID(hit.Fields["RootID"].(string)) - if err != nil { - return nil, err - } - rID, err := storagespace.ParseID(hit.Fields["ID"].(string)) - if err != nil { - return nil, err - } - - match := &searchmsg.Match{ - Score: float32(hit.Score), - Entity: &searchmsg.Entity{ - Ref: &searchmsg.Reference{ - ResourceId: resourceIDtoSearchID(rootID), - Path: hit.Fields["Path"].(string), - }, - Id: resourceIDtoSearchID(rID), - Name: hit.Fields["Name"].(string), - Size: uint64(hit.Fields["Size"].(float64)), - Type: uint64(hit.Fields["Type"].(float64)), - MimeType: hit.Fields["MimeType"].(string), - Deleted: hit.Fields["Deleted"].(bool), - }, - } - if hit.Fields["ParentID"] != nil && hit.Fields["ParentID"] != "" { - parentID, err := storagespace.ParseID(hit.Fields["ParentID"].(string)) - if err != nil { - return nil, err - } - match.Entity.ParentId = resourceIDtoSearchID(parentID) - } - - if mtime, err := time.Parse(time.RFC3339Nano, hit.Fields["Mtime"].(string)); err == nil { - match.Entity.LastModifiedTime = ×tamppb.Timestamp{Seconds: mtime.Unix(), Nanos: int32(mtime.Nanosecond())} - } - - return match, nil -} - -func idToBleveId(id *sprovider.ResourceId) string { - if id == nil { - return "" - } - return storagespace.FormatResourceID(*id) -} - -func resourceIDtoSearchID(id sprovider.ResourceId) *searchmsg.ResourceID { - return &searchmsg.ResourceID{ - StorageId: id.GetStorageId(), - SpaceId: id.GetSpaceId(), - OpaqueId: id.GetOpaqueId()} -} - -func queryEscape(s string) string { - re := regexp.MustCompile(`([` + regexp.QuoteMeta(`+=&|>1000`, 1) - assertDocCount(ref.ResourceId, `Size:<100000`, 1) - - assertDocCount(ref.ResourceId, `Size:12344`, 0) - assertDocCount(ref.ResourceId, `Size:<1000`, 0) - assertDocCount(ref.ResourceId, `Size:>100000`, 0) - }) - }) - - Context("by filename", func() { - It("finds files with spaces in the filename", func() { - ri.Name = "Foo oo.pdf" - ref.Path = "./" + ri.Path - err := i.Add(ref, ri) - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(ref.ResourceId, `Name:foo\ o*`, 1) - }) - - It("finds files by digits in the filename", func() { - ri.Name = "12345.pdf" - ref.Path = "./" + ri.Path - err := i.Add(ref, ri) - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(ref.ResourceId, `Name:1234*`, 1) - }) - - It("filters hidden files", func() { - ri.Path = ".hidden.pdf" - ri.Name = ".hidden.pdf" - ref.Path = "./" + ri.Path - err := i.Add(ref, ri) - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(ref.ResourceId, `Name:*hidden* +Hidden:T`, 1) - assertDocCount(ref.ResourceId, `Name:*hidden* +Hidden:F`, 0) - }) - - Context("with a file in the root of the space", func() { - JustBeforeEach(func() { - err := i.Add(ref, ri) - Expect(err).ToNot(HaveOccurred()) - }) - - It("scopes the search to the specified space", func() { - resourceId := &sprovider.ResourceId{ - StorageId: "provider-1", - SpaceId: "differentspaceid", - OpaqueId: "differentopaqueid", - } - assertDocCount(resourceId, `Name:foo.pdf`, 0) - }) - - It("limits the search to the specified fields", func() { - assertDocCount(ref.ResourceId, "Name:*"+ref.ResourceId.OpaqueId+"*", 0) - }) - - It("returns the total number of hits", func() { - res, err := i.Search(ctx, &searchsvc.SearchIndexRequest{ - Query: "Name:foo.pdf", - Ref: &searchmsg.Reference{ - ResourceId: &searchmsg.ResourceID{ - StorageId: ref.ResourceId.StorageId, - SpaceId: ref.ResourceId.SpaceId, - OpaqueId: ref.ResourceId.OpaqueId, - }, - }, - }) - Expect(err).ToNot(HaveOccurred()) - Expect(res.TotalMatches).To(Equal(int32(1))) - }) - It("returns all desired fields", func() { - matches := assertDocCount(ref.ResourceId, "Name:foo.pdf", 1) - match := matches[0] - Expect(match.Entity.Ref.ResourceId.OpaqueId).To(Equal(ref.ResourceId.OpaqueId)) - Expect(match.Entity.Ref.Path).To(Equal(ref.Path)) - Expect(match.Entity.Id.OpaqueId).To(Equal(ri.Id.OpaqueId)) - Expect(match.Entity.Name).To(Equal(ri.Path)) - Expect(match.Entity.Size).To(Equal(ri.Size)) - Expect(match.Entity.Type).To(Equal(uint64(ri.Type))) - Expect(match.Entity.MimeType).To(Equal(ri.MimeType)) - Expect(match.Entity.Deleted).To(BeFalse()) - Expect(match.Score > 0).To(BeTrue()) - Expect(uint64(match.Entity.LastModifiedTime.AsTime().Unix())).To(Equal(ri.Mtime.Seconds)) - }) - - It("finds files by name, prefix or substring match", func() { - queries := []string{"foo.pdf", "foo*", "*oo.p*"} - for _, query := range queries { - matches := assertDocCount(ref.ResourceId, query, 1) - Expect(matches[0].Entity.Ref.ResourceId.OpaqueId).To(Equal(ref.ResourceId.OpaqueId)) - Expect(matches[0].Entity.Ref.Path).To(Equal(ref.Path)) - Expect(matches[0].Entity.Id.OpaqueId).To(Equal(ri.Id.OpaqueId)) - Expect(matches[0].Entity.Name).To(Equal(ri.Path)) - Expect(matches[0].Entity.Size).To(Equal(ri.Size)) - } - }) - - It("uses a lower-case index", func() { - assertDocCount(ref.ResourceId, "Name:foo*", 1) - assertDocCount(ref.ResourceId, "Name:Foo*", 0) - }) - - Context("and an additional file in a subdirectory", func() { - var ( - nestedRef *sprovider.Reference - nestedRI *sprovider.ResourceInfo - ) - - BeforeEach(func() { - nestedRef = &sprovider.Reference{ - ResourceId: &sprovider.ResourceId{ - StorageId: "provider-1", - SpaceId: "spaceid", - OpaqueId: "rootopaqueid", - }, - Path: "./nested/nestedpdf.pdf", - } - nestedRI = &sprovider.ResourceInfo{ - Id: &sprovider.ResourceId{ - StorageId: "provider-1", - SpaceId: "spaceid", - OpaqueId: "nestedopaqueid", - }, - Path: "nestedpdf.pdf", - Name: "nestedpdf.pdf", - Size: 12345, - } - err := i.Add(nestedRef, nestedRI) - Expect(err).ToNot(HaveOccurred()) - }) - - It("finds files living deeper in the tree by filename, prefix or substring match", func() { - queries := []string{"nestedpdf.pdf", "nested*", "*tedpdf.*"} - for _, query := range queries { - assertDocCount(ref.ResourceId, query, 1) - } - }) - - It("does not find the higher levels when limiting the searched directory", func() { - res, err := i.Search(ctx, &searchsvc.SearchIndexRequest{ - Ref: &searchmsg.Reference{ - ResourceId: &searchmsg.ResourceID{ - StorageId: ref.ResourceId.StorageId, - SpaceId: ref.ResourceId.SpaceId, - OpaqueId: ref.ResourceId.OpaqueId, - }, - Path: "./nested/", - }, - Query: "Name:foo.pdf", - }) - Expect(err).ToNot(HaveOccurred()) - Expect(res).ToNot(BeNil()) - Expect(len(res.Matches)).To(Equal(0)) - }) - }) - }) - }) - }) - - Describe("Add", func() { - It("adds a resourceInfo to the index", func() { - err := i.Add(ref, ri) - Expect(err).ToNot(HaveOccurred()) - - count, err := bleveIndex.DocCount() - Expect(err).ToNot(HaveOccurred()) - Expect(count).To(Equal(uint64(1))) - - query := bleve.NewMatchQuery("foo.pdf") - res, err := bleveIndex.Search(bleve.NewSearchRequest(query)) - Expect(err).ToNot(HaveOccurred()) - Expect(res.Hits.Len()).To(Equal(1)) - }) - - It("updates an existing resource in the index", func() { - err := i.Add(ref, ri) - Expect(err).ToNot(HaveOccurred()) - count, _ := bleveIndex.DocCount() - Expect(count).To(Equal(uint64(1))) - - err = i.Add(ref, ri) - Expect(err).ToNot(HaveOccurred()) - count, _ = bleveIndex.DocCount() - Expect(count).To(Equal(uint64(1))) - }) - }) - - Describe("Delete", func() { - It("marks a resource as deleted", func() { - err := i.Add(parentRef, parentRi) - Expect(err).ToNot(HaveOccurred()) - assertDocCount(rootId, `sub\ d!r`, 1) - - err = i.Delete(parentRi.Id) - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(rootId, `sub\ d!r`, 0) - }) - - It("also marks child resources as deleted", func() { - err := i.Add(parentRef, parentRi) - Expect(err).ToNot(HaveOccurred()) - err = i.Add(childRef, childRi) - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(rootId, `sub\ d\!r`, 1) - assertDocCount(rootId, "child.pdf", 1) - - err = i.Delete(parentRi.Id) - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(rootId, `sub\ d\!r`, 0) - assertDocCount(rootId, "child.pdf", 0) - }) - }) - - Describe("Restore", func() { - It("also marks child resources as restored", func() { - err := i.Add(parentRef, parentRi) - Expect(err).ToNot(HaveOccurred()) - err = i.Add(childRef, childRi) - Expect(err).ToNot(HaveOccurred()) - err = i.Delete(parentRi.Id) - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(rootId, `sub\ d!r`, 0) - assertDocCount(rootId, "child.pdf", 0) - - err = i.Restore(parentRi.Id) - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(rootId, `sub\ d!r`, 1) - assertDocCount(rootId, "child.pdf", 1) - }) - }) - - Describe("Move", func() { - It("renames the parent and its child resources", func() { - err := i.Add(parentRef, parentRi) - Expect(err).ToNot(HaveOccurred()) - err = i.Add(childRef, childRi) - Expect(err).ToNot(HaveOccurred()) - - parentRi.Path = "newname" - err = i.Move(parentRi.Id, parentRi.ParentId, "./my/newname") - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(rootId, `sub\ d!r`, 0) - - matches := assertDocCount(rootId, "Name:child.pdf", 1) - Expect(matches[0].Entity.ParentId.OpaqueId).To(Equal("parentopaqueid")) - Expect(matches[0].Entity.Ref.Path).To(Equal("./my/newname/child.pdf")) - }) - - It("moves the parent and its child resources", func() { - err := i.Add(parentRef, parentRi) - Expect(err).ToNot(HaveOccurred()) - err = i.Add(childRef, childRi) - Expect(err).ToNot(HaveOccurred()) - - parentRi.Path = " " - parentRi.ParentId = &sprovider.ResourceId{ - StorageId: "provider-1", - SpaceId: "spaceid", - OpaqueId: "somewhereopaqueid", - } - err = i.Move(parentRi.Id, parentRi.ParentId, "./somewhere/else/newname") - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(rootId, `sub\ d!r`, 0) - - matches := assertDocCount(rootId, "Name:child.pdf", 1) - Expect(matches[0].Entity.ParentId.OpaqueId).To(Equal("parentopaqueid")) - Expect(matches[0].Entity.Ref.Path).To(Equal("./somewhere/else/newname/child.pdf")) - - matches = assertDocCount(rootId, `newname`, 1) - Expect(matches[0].Entity.ParentId.OpaqueId).To(Equal("somewhereopaqueid")) - Expect(matches[0].Entity.Ref.Path).To(Equal("./somewhere/else/newname")) - - }) - }) -}) diff --git a/services/search/pkg/search/mocks/index_client.go b/services/search/pkg/search/mocks/index_client.go deleted file mode 100644 index 2618aeeb393..00000000000 --- a/services/search/pkg/search/mocks/index_client.go +++ /dev/null @@ -1,146 +0,0 @@ -// Code generated by mockery v2.14.1. DO NOT EDIT. - -package mocks - -import ( - context "context" - - providerv1beta1 "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" - mock "github.com/stretchr/testify/mock" - - v0 "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" -) - -// IndexClient is an autogenerated mock type for the IndexClient type -type IndexClient struct { - mock.Mock -} - -// Add provides a mock function with given fields: ref, ri -func (_m *IndexClient) Add(ref *providerv1beta1.Reference, ri *providerv1beta1.ResourceInfo) error { - ret := _m.Called(ref, ri) - - var r0 error - if rf, ok := ret.Get(0).(func(*providerv1beta1.Reference, *providerv1beta1.ResourceInfo) error); ok { - r0 = rf(ref, ri) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Delete provides a mock function with given fields: id -func (_m *IndexClient) Delete(id *providerv1beta1.ResourceId) error { - ret := _m.Called(id) - - var r0 error - if rf, ok := ret.Get(0).(func(*providerv1beta1.ResourceId) error); ok { - r0 = rf(id) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// DocCount provides a mock function with given fields: -func (_m *IndexClient) DocCount() (uint64, error) { - ret := _m.Called() - - var r0 uint64 - if rf, ok := ret.Get(0).(func() uint64); ok { - r0 = rf() - } else { - r0 = ret.Get(0).(uint64) - } - - var r1 error - if rf, ok := ret.Get(1).(func() error); ok { - r1 = rf() - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// Move provides a mock function with given fields: id, parentID, fullPath -func (_m *IndexClient) Move(id *providerv1beta1.ResourceId, parentID *providerv1beta1.ResourceId, fullPath string) error { - ret := _m.Called(id, parentID, fullPath) - - var r0 error - if rf, ok := ret.Get(0).(func(*providerv1beta1.ResourceId, *providerv1beta1.ResourceId, string) error); ok { - r0 = rf(id, parentID, fullPath) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Purge provides a mock function with given fields: id -func (_m *IndexClient) Purge(id *providerv1beta1.ResourceId) error { - ret := _m.Called(id) - - var r0 error - if rf, ok := ret.Get(0).(func(*providerv1beta1.ResourceId) error); ok { - r0 = rf(id) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Restore provides a mock function with given fields: id -func (_m *IndexClient) Restore(id *providerv1beta1.ResourceId) error { - ret := _m.Called(id) - - var r0 error - if rf, ok := ret.Get(0).(func(*providerv1beta1.ResourceId) error); ok { - r0 = rf(id) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Search provides a mock function with given fields: ctx, req -func (_m *IndexClient) Search(ctx context.Context, req *v0.SearchIndexRequest) (*v0.SearchIndexResponse, error) { - ret := _m.Called(ctx, req) - - var r0 *v0.SearchIndexResponse - if rf, ok := ret.Get(0).(func(context.Context, *v0.SearchIndexRequest) *v0.SearchIndexResponse); ok { - r0 = rf(ctx, req) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*v0.SearchIndexResponse) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *v0.SearchIndexRequest) error); ok { - r1 = rf(ctx, req) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -type mockConstructorTestingTNewIndexClient interface { - mock.TestingT - Cleanup(func()) -} - -// NewIndexClient creates a new instance of IndexClient. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewIndexClient(t mockConstructorTestingTNewIndexClient) *IndexClient { - mock := &IndexClient{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/services/search/pkg/search/mocks/provider_client.go b/services/search/pkg/search/mocks/provider_client.go deleted file mode 100644 index 5d93c769c10..00000000000 --- a/services/search/pkg/search/mocks/provider_client.go +++ /dev/null @@ -1,77 +0,0 @@ -// Code generated by mockery v2.14.1. DO NOT EDIT. - -package mocks - -import ( - context "context" - - mock "github.com/stretchr/testify/mock" - - v0 "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" -) - -// ProviderClient is an autogenerated mock type for the ProviderClient type -type ProviderClient struct { - mock.Mock -} - -// IndexSpace provides a mock function with given fields: ctx, req -func (_m *ProviderClient) IndexSpace(ctx context.Context, req *v0.IndexSpaceRequest) (*v0.IndexSpaceResponse, error) { - ret := _m.Called(ctx, req) - - var r0 *v0.IndexSpaceResponse - if rf, ok := ret.Get(0).(func(context.Context, *v0.IndexSpaceRequest) *v0.IndexSpaceResponse); ok { - r0 = rf(ctx, req) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*v0.IndexSpaceResponse) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *v0.IndexSpaceRequest) error); ok { - r1 = rf(ctx, req) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// Search provides a mock function with given fields: ctx, req -func (_m *ProviderClient) Search(ctx context.Context, req *v0.SearchRequest) (*v0.SearchResponse, error) { - ret := _m.Called(ctx, req) - - var r0 *v0.SearchResponse - if rf, ok := ret.Get(0).(func(context.Context, *v0.SearchRequest) *v0.SearchResponse); ok { - r0 = rf(ctx, req) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*v0.SearchResponse) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *v0.SearchRequest) error); ok { - r1 = rf(ctx, req) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -type mockConstructorTestingTNewProviderClient interface { - mock.TestingT - Cleanup(func()) -} - -// NewProviderClient creates a new instance of ProviderClient. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewProviderClient(t mockConstructorTestingTNewProviderClient) *ProviderClient { - mock := &ProviderClient{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/services/search/pkg/search/mocks/searcher.go b/services/search/pkg/search/mocks/searcher.go new file mode 100644 index 00000000000..557e9a0e5aa --- /dev/null +++ b/services/search/pkg/search/mocks/searcher.go @@ -0,0 +1,91 @@ +// Code generated by mockery v2.14.1. DO NOT EDIT. + +package mocks + +import ( + context "context" + + providerv1beta1 "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + mock "github.com/stretchr/testify/mock" + + userv1beta1 "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" + + v0 "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" +) + +// Searcher is an autogenerated mock type for the Searcher type +type Searcher struct { + mock.Mock +} + +// IndexSpace provides a mock function with given fields: rID, uID +func (_m *Searcher) IndexSpace(rID *providerv1beta1.StorageSpaceId, uID *userv1beta1.UserId) error { + ret := _m.Called(rID, uID) + + var r0 error + if rf, ok := ret.Get(0).(func(*providerv1beta1.StorageSpaceId, *userv1beta1.UserId) error); ok { + r0 = rf(rID, uID) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MoveItem provides a mock function with given fields: ref, uID +func (_m *Searcher) MoveItem(ref *providerv1beta1.Reference, uID *userv1beta1.UserId) { + _m.Called(ref, uID) +} + +// RestoreItem provides a mock function with given fields: ref, uID +func (_m *Searcher) RestoreItem(ref *providerv1beta1.Reference, uID *userv1beta1.UserId) { + _m.Called(ref, uID) +} + +// Search provides a mock function with given fields: ctx, req +func (_m *Searcher) Search(ctx context.Context, req *v0.SearchRequest) (*v0.SearchResponse, error) { + ret := _m.Called(ctx, req) + + var r0 *v0.SearchResponse + if rf, ok := ret.Get(0).(func(context.Context, *v0.SearchRequest) *v0.SearchResponse); ok { + r0 = rf(ctx, req) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*v0.SearchResponse) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, *v0.SearchRequest) error); ok { + r1 = rf(ctx, req) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// TrashItem provides a mock function with given fields: rID +func (_m *Searcher) TrashItem(rID *providerv1beta1.ResourceId) { + _m.Called(rID) +} + +// UpsertItem provides a mock function with given fields: ref, uID +func (_m *Searcher) UpsertItem(ref *providerv1beta1.Reference, uID *userv1beta1.UserId) { + _m.Called(ref, uID) +} + +type mockConstructorTestingTNewSearcher interface { + mock.TestingT + Cleanup(func()) +} + +// NewSearcher creates a new instance of Searcher. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewSearcher(t mockConstructorTestingTNewSearcher) *Searcher { + mock := &Searcher{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/services/search/pkg/search/provider/events.go b/services/search/pkg/search/provider/events.go deleted file mode 100644 index 6679d9f62bd..00000000000 --- a/services/search/pkg/search/provider/events.go +++ /dev/null @@ -1,202 +0,0 @@ -package provider - -import ( - "context" - "sync" - "time" - - gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" - user "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" - rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" - rpcv1beta1 "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" - provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" - revactx "github.com/cs3org/reva/v2/pkg/ctx" - "github.com/cs3org/reva/v2/pkg/errtypes" - "github.com/cs3org/reva/v2/pkg/events" - "github.com/cs3org/reva/v2/pkg/storagespace" - "google.golang.org/grpc/metadata" -) - -// SpaceDebouncer debounces operations on spaces for a configurable amount of time -type SpaceDebouncer struct { - after time.Duration - f func(id *provider.StorageSpaceId, userID *user.UserId) - pending map[string]*time.Timer - inProgress sync.Map - - mutex sync.Mutex -} - -// NewSpaceDebouncer returns a new SpaceDebouncer instance -func NewSpaceDebouncer(d time.Duration, f func(id *provider.StorageSpaceId, userID *user.UserId)) *SpaceDebouncer { - return &SpaceDebouncer{ - after: d, - f: f, - pending: map[string]*time.Timer{}, - inProgress: sync.Map{}, - } -} - -// Debounce restars the debounce timer for the given space -func (d *SpaceDebouncer) Debounce(id *provider.StorageSpaceId, userID *user.UserId) { - d.mutex.Lock() - defer d.mutex.Unlock() - - if t := d.pending[id.OpaqueId]; t != nil { - t.Stop() - } - - d.pending[id.OpaqueId] = time.AfterFunc(d.after, func() { - if _, ok := d.inProgress.Load(id.OpaqueId); ok { - // Reschedule this run for when the previous run has finished - d.mutex.Lock() - d.pending[id.OpaqueId].Reset(d.after) - d.mutex.Unlock() - return - } - - d.inProgress.Store(id.OpaqueId, true) - defer d.inProgress.Delete(id.OpaqueId) - d.f(id, userID) - }) -} - -func (p *Provider) handleEvent(ev interface{}) { - switch e := ev.(type) { - case events.ItemTrashed: - p.logger.Debug().Interface("event", ev).Msg("marking document as deleted") - err := p.indexClient.Delete(e.ID) - if err != nil { - p.logger.Error().Err(err).Interface("Id", e.ID).Msg("failed to remove item from index") - } - p.reindexSpace(ev, e.Ref, e.Executant, e.SpaceOwner) - case events.ItemRestored: - p.logger.Debug().Interface("event", ev).Msg("marking document as restored") - owner := &user.User{ - Id: e.Executant, - } - - ownerCtx, err := p.getAuthContext(owner) - if err != nil { - return - } - statRes, err := p.statResource(ownerCtx, e.Ref, owner) - if err != nil { - p.logger.Error().Err(err). - Str("storageid", e.Ref.GetResourceId().GetStorageId()). - Str("spaceid", e.Ref.GetResourceId().GetSpaceId()). - Str("opaqueid", e.Ref.GetResourceId().GetOpaqueId()). - Str("path", e.Ref.GetPath()). - Msg("failed to make stat call for the restored resource") - return - } - - switch statRes.Status.Code { - case rpc.Code_CODE_OK: - err = p.indexClient.Restore(statRes.Info.Id) - if err != nil { - p.logger.Error().Err(err). - Str("storageid", e.Ref.GetResourceId().GetStorageId()). - Str("spaceid", e.Ref.GetResourceId().GetSpaceId()). - Str("opaqueid", e.Ref.GetResourceId().GetOpaqueId()). - Str("path", e.Ref.GetPath()). - Msg("failed to restore the changed resource in the index") - } - default: - p.logger.Error().Interface("statRes", statRes). - Str("storageid", e.Ref.GetResourceId().GetStorageId()). - Str("spaceid", e.Ref.GetResourceId().GetSpaceId()). - Str("opaqueid", e.Ref.GetResourceId().GetOpaqueId()). - Str("path", e.Ref.GetPath()). - Msg("failed to stat the restored resource") - } - p.reindexSpace(ev, e.Ref, e.Executant, e.SpaceOwner) - case events.ItemMoved: - p.logger.Debug().Interface("event", ev).Msg("resource has been moved, updating the document") - owner := &user.User{ - Id: e.Executant, - } - - ownerCtx, err := p.getAuthContext(owner) - if err != nil { - return - } - statRes, err := p.statResource(ownerCtx, e.Ref, owner) - if err != nil { - p.logger.Error().Err(err).Msg("failed to stat the moved resource") - return - } - if statRes.Status.Code != rpc.Code_CODE_OK { - p.logger.Error().Interface("statRes", statRes).Msg("failed to stat the moved resource") - return - } - - gpRes, err := p.getPath(ownerCtx, statRes.Info.Id, owner) - if err != nil { - p.logger.Error().Err(err).Interface("ref", e.Ref).Msg("failed to get path for moved resource") - return - } - if gpRes.Status.Code != rpcv1beta1.Code_CODE_OK { - p.logger.Error().Interface("status", gpRes.Status).Interface("ref", e.Ref).Msg("failed to get path for moved resource") - return - } - - err = p.indexClient.Move(statRes.GetInfo().GetId(), statRes.GetInfo().GetParentId(), gpRes.Path) - if err != nil { - p.logger.Error().Err(err).Msg("failed to move the changed resource in the index") - } - p.reindexSpace(ev, e.Ref, e.Executant, e.SpaceOwner) - case events.ContainerCreated: - p.reindexSpace(ev, e.Ref, e.Executant, e.SpaceOwner) - case events.FileUploaded: - p.reindexSpace(ev, e.Ref, e.Executant, e.SpaceOwner) - case events.FileTouched: - p.reindexSpace(ev, e.Ref, e.Executant, e.SpaceOwner) - case events.FileVersionRestored: - p.reindexSpace(ev, e.Ref, e.Executant, e.SpaceOwner) - default: - // Not sure what to do here. Skip. - return - } -} - -func (p *Provider) reindexSpace(ev interface{}, ref *provider.Reference, executant, owner *user.UserId) { - p.logger.Debug().Interface("event", ev).Msg("resource has been changed, scheduling a space resync") - - spaceID := &provider.StorageSpaceId{ - OpaqueId: storagespace.FormatResourceID(provider.ResourceId{ - StorageId: ref.GetResourceId().GetStorageId(), - SpaceId: ref.GetResourceId().GetSpaceId(), - }), - } - if owner != nil { - p.indexSpaceDebouncer.Debounce(spaceID, owner) - } else { - p.indexSpaceDebouncer.Debounce(spaceID, executant) - } -} - -func (p *Provider) statResource(ctx context.Context, ref *provider.Reference, owner *user.User) (*provider.StatResponse, error) { - return p.gwClient.Stat(ctx, &provider.StatRequest{Ref: ref}) -} - -func (p *Provider) getPath(ctx context.Context, id *provider.ResourceId, owner *user.User) (*provider.GetPathResponse, error) { - return p.gwClient.GetPath(ctx, &provider.GetPathRequest{ResourceId: id}) -} - -func (p *Provider) getAuthContext(owner *user.User) (context.Context, error) { - ownerCtx := revactx.ContextSetUser(context.Background(), owner) - authRes, err := p.gwClient.Authenticate(ownerCtx, &gateway.AuthenticateRequest{ - Type: "machine", - ClientId: "userid:" + owner.GetId().GetOpaqueId(), - ClientSecret: p.machineAuthAPIKey, - }) - if err == nil && authRes.GetStatus().GetCode() != rpc.Code_CODE_OK { - err = errtypes.NewErrtypeFromStatus(authRes.Status) - } - if err != nil { - p.logger.Error().Err(err).Interface("owner", owner).Interface("authRes", authRes).Msg("error using machine auth") - return nil, err - } - return metadata.AppendToOutgoingContext(ownerCtx, revactx.TokenHeader, authRes.Token), nil -} diff --git a/services/search/pkg/search/provider/events_test.go b/services/search/pkg/search/provider/events_test.go deleted file mode 100644 index 980b664ce19..00000000000 --- a/services/search/pkg/search/provider/events_test.go +++ /dev/null @@ -1,258 +0,0 @@ -package provider_test - -import ( - "context" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "github.com/stretchr/testify/mock" - - gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" - user "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" - userv1beta1 "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" - sprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" - "github.com/cs3org/reva/v2/pkg/events" - "github.com/cs3org/reva/v2/pkg/rgrpc/status" - "github.com/cs3org/reva/v2/pkg/utils" - cs3mocks "github.com/cs3org/reva/v2/tests/cs3mocks/mocks" - "github.com/owncloud/ocis/v2/ocis-pkg/log" - "github.com/owncloud/ocis/v2/services/search/pkg/search/mocks" - provider "github.com/owncloud/ocis/v2/services/search/pkg/search/provider" -) - -var _ = Describe("Searchprovider", func() { - var ( - p *provider.Provider - gwClient *cs3mocks.GatewayAPIClient - indexClient *mocks.IndexClient - debouncedIndexCalls int - - ctx context.Context - eventsChan chan interface{} - - logger = log.NewLogger() - user = &userv1beta1.User{ - Id: &userv1beta1.UserId{ - OpaqueId: "user", - }, - } - - ref = &sprovider.Reference{ - ResourceId: &sprovider.ResourceId{ - StorageId: "storageid", - SpaceId: "rootopaqueid", - OpaqueId: "rootopaqueid", - }, - Path: "./foo.pdf", - } - ri = &sprovider.ResourceInfo{ - Id: &sprovider.ResourceId{ - StorageId: "storageid", - SpaceId: "rootopaqueid", - OpaqueId: "opaqueid", - }, - Path: "foo.pdf", - Size: 12345, - Mtime: utils.TimeToTS(time.Now().Add(-time.Hour)), - } - ) - - BeforeEach(func() { - ctx = context.Background() - eventsChan = make(chan interface{}) - gwClient = &cs3mocks.GatewayAPIClient{} - indexClient = &mocks.IndexClient{} - - debouncedIndexCalls = 0 - debouncer := provider.NewSpaceDebouncer(100*time.Millisecond, func(id *sprovider.StorageSpaceId, userID *userv1beta1.UserId) { - debouncedIndexCalls += 1 - }) - - p = provider.NewWithDebouncer(gwClient, indexClient, "", eventsChan, logger, debouncer) - - gwClient.On("Authenticate", mock.Anything, mock.Anything).Return(&gateway.AuthenticateResponse{ - Status: status.NewOK(ctx), - Token: "authtoken", - }, nil) - gwClient.On("Stat", mock.Anything, mock.Anything).Return(&sprovider.StatResponse{ - Status: status.NewOK(context.Background()), - Info: ri, - }, nil) - indexClient.On("DocCount").Return(uint64(1), nil) - }) - - Describe("New", func() { - It("returns a new instance", func() { - p = provider.New(gwClient, indexClient, "", eventsChan, 1000, logger) - Expect(p).ToNot(BeNil()) - }) - }) - - Describe("events", func() { - Context("with the file being in the original location", func() { - BeforeEach(func() { - gwClient.On("GetPath", mock.Anything, mock.MatchedBy(func(req *sprovider.GetPathRequest) bool { - return req.ResourceId.OpaqueId == ri.Id.OpaqueId - })).Return(&sprovider.GetPathResponse{ - Status: status.NewOK(context.Background()), - Path: ri.Path, - }, nil) - }) - - It("triggers an index update when a file has been uploaded", func() { - eventsChan <- events.FileUploaded{ - Ref: ref, - Executant: user.Id, - } - - Eventually(func() int { - return debouncedIndexCalls - }, "2s").Should(Equal(1)) - }) - - It("triggers an index update when a file has been touched", func() { - eventsChan <- events.FileTouched{ - Ref: ref, - Executant: user.Id, - } - - Eventually(func() int { - return debouncedIndexCalls - }, "2s").Should(Equal(1)) - }) - - It("removes an entry from the index when the file has been deleted", func() { - called := false - gwClient.On("Stat", mock.Anything, mock.Anything).Return(&sprovider.StatResponse{ - Status: status.NewNotFound(context.Background(), ""), - }, nil) - indexClient.On("Delete", mock.MatchedBy(func(id *sprovider.ResourceId) bool { - return id.OpaqueId == ri.Id.OpaqueId - })).Return(nil).Run(func(args mock.Arguments) { - called = true - }) - eventsChan <- events.ItemTrashed{ - Ref: ref, - ID: ri.Id, - Executant: user.Id, - } - - Eventually(func() bool { - return called - }, "2s").Should(BeTrue()) - }) - - It("indexes items when they are being restored", func() { - called := false - indexClient.On("Restore", mock.MatchedBy(func(id *sprovider.ResourceId) bool { - return id.OpaqueId == ri.Id.OpaqueId - })).Return(nil).Run(func(args mock.Arguments) { - called = true - }) - eventsChan <- events.ItemRestored{ - Ref: ref, - Executant: user.Id, - } - - Eventually(func() bool { - return called - }, "2s").Should(BeTrue()) - }) - - It("indexes items when a version has been restored", func() { - eventsChan <- events.FileVersionRestored{ - Ref: ref, - Executant: user.Id, - } - - Eventually(func() int { - return debouncedIndexCalls - }, "2s").Should(Equal(1)) - }) - }) - - It("indexes items when they are being moved", func() { - called := false - gwClient.On("GetPath", mock.Anything, mock.Anything).Return(&sprovider.GetPathResponse{ - Status: status.NewOK(ctx), - Path: "./new/path.pdf", - }, nil) - indexClient.On("Move", mock.MatchedBy(func(id *sprovider.ResourceId) bool { - return id.OpaqueId == ri.Id.OpaqueId - }), mock.Anything, "./new/path.pdf").Return(nil).Run(func(args mock.Arguments) { - called = true - }) - ref.Path = "./new/path.pdf" - eventsChan <- events.ItemMoved{ - Ref: ref, - Executant: user.Id, - } - - Eventually(func() bool { - return called - }, "2s").Should(BeTrue()) - }) - }) -}) - -var _ = Describe("SpaceDebouncer", func() { - var ( - debouncer *provider.SpaceDebouncer - callCount map[string]int - - userId = &user.UserId{ - OpaqueId: "user", - } - spaceid = &sprovider.StorageSpaceId{ - OpaqueId: "spaceid", - } - ) - - BeforeEach(func() { - callCount = map[string]int{} - debouncer = provider.NewSpaceDebouncer(50*time.Millisecond, func(id *sprovider.StorageSpaceId, _ *user.UserId) { - callCount[id.OpaqueId] += 1 - }) - }) - - It("debounces", func() { - debouncer.Debounce(spaceid, userId) - debouncer.Debounce(spaceid, userId) - debouncer.Debounce(spaceid, userId) - Eventually(func() int { - return callCount["spaceid"] - }, "200ms").Should(Equal(1)) - }) - - It("works multiple times", func() { - debouncer.Debounce(spaceid, userId) - debouncer.Debounce(spaceid, userId) - debouncer.Debounce(spaceid, userId) - time.Sleep(100 * time.Millisecond) - - debouncer.Debounce(spaceid, userId) - debouncer.Debounce(spaceid, userId) - - Eventually(func() int { - return callCount["spaceid"] - }, "200ms").Should(Equal(2)) - }) - - It("doesn't trigger twice simultaneously", func() { - debouncer = provider.NewSpaceDebouncer(50*time.Millisecond, func(id *sprovider.StorageSpaceId, _ *user.UserId) { - callCount[id.OpaqueId] += 1 - time.Sleep(300 * time.Millisecond) - }) - debouncer.Debounce(spaceid, userId) - time.Sleep(100 * time.Millisecond) // Let it trigger once - - debouncer.Debounce(spaceid, userId) - time.Sleep(100 * time.Millisecond) // shouldn't trigger as the other run is still in progress - Expect(callCount["spaceid"]).To(Equal(1)) - - Eventually(func() int { - return callCount["spaceid"] - }, "500ms").Should(Equal(2)) - }) -}) diff --git a/services/search/pkg/search/provider/searchprovider.go b/services/search/pkg/search/provider/searchprovider.go deleted file mode 100644 index fc23c0ec8bf..00000000000 --- a/services/search/pkg/search/provider/searchprovider.go +++ /dev/null @@ -1,418 +0,0 @@ -package provider - -import ( - "context" - "fmt" - "path/filepath" - "sort" - "strings" - "time" - - "go.opentelemetry.io/otel/attribute" - "google.golang.org/grpc/metadata" - - gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" - user "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" - rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" - rpcv1beta1 "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" - provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" - revactx "github.com/cs3org/reva/v2/pkg/ctx" - "github.com/cs3org/reva/v2/pkg/errtypes" - "github.com/cs3org/reva/v2/pkg/events" - sdk "github.com/cs3org/reva/v2/pkg/sdk/common" - "github.com/cs3org/reva/v2/pkg/storage/utils/walker" - "github.com/cs3org/reva/v2/pkg/storagespace" - "github.com/cs3org/reva/v2/pkg/utils" - "github.com/owncloud/ocis/v2/ocis-pkg/log" - "github.com/owncloud/ocis/v2/services/search/pkg/search" - searchTracing "github.com/owncloud/ocis/v2/services/search/pkg/tracing" - - searchmsg "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0" - searchsvc "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" -) - -// Permissions is copied from reva internal conversion pkg -type Permissions uint - -// consts are copied from reva internal conversion pkg -const ( - // PermissionInvalid represents an invalid permission - PermissionInvalid Permissions = 0 - // PermissionRead grants read permissions on a resource - PermissionRead Permissions = 1 << (iota - 1) - // PermissionWrite grants write permissions on a resource - PermissionWrite - // PermissionCreate grants create permissions on a resource - PermissionCreate - // PermissionDelete grants delete permissions on a resource - PermissionDelete - // PermissionShare grants share permissions on a resource - PermissionShare -) - -var ListenEvents = []events.Unmarshaller{ - events.ItemTrashed{}, - events.ItemRestored{}, - events.ItemMoved{}, - events.ContainerCreated{}, - events.FileUploaded{}, - events.FileTouched{}, - events.FileVersionRestored{}, -} - -type Provider struct { - logger log.Logger - gwClient gateway.GatewayAPIClient - indexClient search.IndexClient - machineAuthAPIKey string - - indexSpaceDebouncer *SpaceDebouncer -} - -type MatchArray []*searchmsg.Match - -func (s MatchArray) Len() int { - return len(s) -} -func (s MatchArray) Swap(i, j int) { - s[i], s[j] = s[j], s[i] -} -func (s MatchArray) Less(i, j int) bool { - return s[i].Score > s[j].Score -} - -func New(gwClient gateway.GatewayAPIClient, indexClient search.IndexClient, machineAuthAPIKey string, eventsChan <-chan interface{}, debounceDuration int, logger log.Logger) *Provider { - p := &Provider{ - gwClient: gwClient, - indexClient: indexClient, - machineAuthAPIKey: machineAuthAPIKey, - logger: logger, - } - - p.indexSpaceDebouncer = NewSpaceDebouncer(time.Duration(debounceDuration)*time.Millisecond, func(id *provider.StorageSpaceId, userID *user.UserId) { - err := p.doIndexSpace(context.Background(), id, userID) - if err != nil { - p.logger.Error().Err(err).Interface("spaceID", id).Interface("userID", userID).Msg("error while indexing a space") - } - }) - - go func() { - for { - ev := <-eventsChan - go func() { - time.Sleep(1 * time.Second) // Give some time to let everything settle down before trying to access it when indexing - p.handleEvent(ev) - }() - } - }() - - return p -} - -// NewWithDebouncer returns a new provider with a customer index space debouncer -func NewWithDebouncer(gwClient gateway.GatewayAPIClient, indexClient search.IndexClient, machineAuthAPIKey string, eventsChan <-chan interface{}, logger log.Logger, debouncer *SpaceDebouncer) *Provider { - p := New(gwClient, indexClient, machineAuthAPIKey, eventsChan, 0, logger) - p.indexSpaceDebouncer = debouncer - return p -} - -func (p *Provider) Search(ctx context.Context, req *searchsvc.SearchRequest) (*searchsvc.SearchResponse, error) { - ctx, span := searchTracing.TraceProvider.Tracer("search").Start(ctx, "search") - defer span.End() - span.SetAttributes(attribute.String("query", req.GetQuery())) - if req.Query == "" { - return nil, errtypes.BadRequest("empty query provided") - } - p.logger.Debug().Str("query", req.Query).Msg("performing a search") - - listSpacesRes, err := p.gwClient.ListStorageSpaces(ctx, &provider.ListStorageSpacesRequest{ - Filters: []*provider.ListStorageSpacesRequest_Filter{ - { - Type: provider.ListStorageSpacesRequest_Filter_TYPE_SPACE_TYPE, - Term: &provider.ListStorageSpacesRequest_Filter_SpaceType{SpaceType: "+grant"}, - }, - }, - }) - if err != nil { - p.logger.Error().Err(err).Msg("failed to list the user's storage spaces") - return nil, err - } - - mountpointMap := map[string]string{} - for _, space := range listSpacesRes.StorageSpaces { - if space.SpaceType != "mountpoint" { - continue - } - opaqueMap := sdk.DecodeOpaqueMap(space.Opaque) - grantSpaceId := storagespace.FormatResourceID(provider.ResourceId{ - StorageId: opaqueMap["grantStorageID"], - SpaceId: opaqueMap["grantSpaceID"], - OpaqueId: opaqueMap["grantOpaqueID"], - }) - mountpointMap[grantSpaceId] = space.Id.OpaqueId - } - - matches := MatchArray{} - total := int32(0) - for _, space := range listSpacesRes.StorageSpaces { - searchRootId := &searchmsg.ResourceID{ - StorageId: space.Root.StorageId, - SpaceId: space.Root.SpaceId, - OpaqueId: space.Root.OpaqueId, - } - - if req.Ref != nil && - (req.Ref.ResourceId.StorageId != searchRootId.StorageId || - req.Ref.ResourceId.SpaceId != searchRootId.SpaceId || - req.Ref.ResourceId.OpaqueId != searchRootId.OpaqueId) { - continue - } - - var ( - mountpointRootID *searchmsg.ResourceID - rootName string - permissions *provider.ResourcePermissions - ) - mountpointPrefix := "" - switch space.SpaceType { - case "mountpoint": - continue // mountpoint spaces are only "links" to the shared spaces. we have to search the shared "grant" space instead - case "grant": - // In case of grant spaces we search the root of the outer space and translate the paths to the according mountpoint - searchRootId.OpaqueId = space.Root.SpaceId - mountpointID, ok := mountpointMap[space.Id.OpaqueId] - if !ok { - p.logger.Warn().Interface("space", space).Msg("could not find mountpoint space for grant space") - continue - } - gpRes, err := p.gwClient.GetPath(ctx, &provider.GetPathRequest{ - ResourceId: space.Root, - }) - if err != nil { - p.logger.Error().Err(err).Str("space", space.Id.OpaqueId).Msg("failed to get path for grant space root") - continue - } - if gpRes.Status.Code != rpcv1beta1.Code_CODE_OK { - p.logger.Error().Interface("status", gpRes.Status).Str("space", space.Id.OpaqueId).Msg("failed to get path for grant space root") - continue - } - mountpointPrefix = utils.MakeRelativePath(gpRes.Path) - sid, spid, oid, err := storagespace.SplitID(mountpointID) - if err != nil { - p.logger.Error().Err(err).Str("space", space.Id.OpaqueId).Str("mountpointId", mountpointID).Msg("invalid mountpoint space id") - continue - } - mountpointRootID = &searchmsg.ResourceID{ - StorageId: sid, - SpaceId: spid, - OpaqueId: oid, - } - - rootName = filepath.Join("/", filepath.Base(gpRes.GetPath())) - permissions = space.GetRootInfo().GetPermissionSet() - p.logger.Debug().Interface("grantSpace", space).Interface("mountpointRootId", mountpointRootID).Msg("searching a grant") - case "personal": - permissions = space.GetRootInfo().GetPermissionSet() - } - - res, err := p.indexClient.Search(ctx, &searchsvc.SearchIndexRequest{ - Query: formatQuery(req.Query), - Ref: &searchmsg.Reference{ - ResourceId: searchRootId, - Path: mountpointPrefix, - }, - PageSize: req.PageSize, - }) - if err != nil { - p.logger.Error().Err(err).Str("space", space.Id.OpaqueId).Msg("failed to search the index") - return nil, err - } - p.logger.Debug().Str("space", space.Id.OpaqueId).Int("hits", len(res.Matches)).Msg("space search done") - - total += res.TotalMatches - for _, match := range res.Matches { - if mountpointPrefix != "" { - match.Entity.Ref.Path = utils.MakeRelativePath(strings.TrimPrefix(match.Entity.Ref.Path, mountpointPrefix)) - } - if mountpointRootID != nil { - match.Entity.Ref.ResourceId = mountpointRootID - } - match.Entity.ShareRootName = rootName - - isShared := match.GetEntity().GetRef().GetResourceId().GetSpaceId() == utils.ShareStorageSpaceID - isMountpoint := isShared && match.GetEntity().GetRef().GetPath() == "." - isDir := match.GetEntity().GetMimeType() == "httpd/unix-directory" - match.Entity.Permissions = convertToWebDAVPermissions(isShared, isMountpoint, isDir, permissions) - matches = append(matches, match) - } - } - - // compile one sorted list of matches from all spaces and apply the limit if needed - sort.Sort(matches) - span.SetAttributes(attribute.Int("num_matches", len(matches))) - span.SetAttributes(attribute.Int("total_matches", int(total))) - limit := req.PageSize - if limit == 0 { - limit = 200 - } - if int32(len(matches)) > limit { - matches = matches[0:limit] - } - - return &searchsvc.SearchResponse{ - Matches: matches, - TotalMatches: total, - }, nil -} - -func (p *Provider) IndexSpace(ctx context.Context, req *searchsvc.IndexSpaceRequest) (*searchsvc.IndexSpaceResponse, error) { - err := p.doIndexSpace(ctx, &provider.StorageSpaceId{OpaqueId: req.SpaceId}, &user.UserId{OpaqueId: req.UserId}) - if err != nil { - return nil, err - } - return &searchsvc.IndexSpaceResponse{}, nil -} - -func (p *Provider) doIndexSpace(ctx context.Context, spaceID *provider.StorageSpaceId, userID *user.UserId) error { - ctx, span := searchTracing.TraceProvider.Tracer("search").Start(ctx, "index space") - defer span.End() - authRes, err := p.gwClient.Authenticate(ctx, &gateway.AuthenticateRequest{ - Type: "machine", - ClientId: "userid:" + userID.OpaqueId, - ClientSecret: p.machineAuthAPIKey, - }) - if err != nil || authRes.GetStatus().GetCode() != rpc.Code_CODE_OK { - return err - } - span.SetAttributes(attribute.String("user_id", userID.GetOpaqueId())) - span.SetAttributes(attribute.String("space_id", spaceID.GetOpaqueId())) - - if authRes.GetStatus().GetCode() != rpc.Code_CODE_OK { - return fmt.Errorf("could not get authenticated context for user") - } - ownerCtx := metadata.AppendToOutgoingContext(ctx, revactx.TokenHeader, authRes.Token) - - // Walk the space and index all files - walker := walker.NewWalker(p.gwClient) - rootID, err := storagespace.ParseID(spaceID.OpaqueId) - if err != nil { - p.logger.Error().Err(err).Msg("invalid space id") - return err - } - if rootID.StorageId == "" || rootID.SpaceId == "" { - p.logger.Error().Err(err).Msg("invalid space id") - return fmt.Errorf("invalid space id") - } - rootID.OpaqueId = rootID.SpaceId - - err = walker.Walk(ownerCtx, &rootID, func(wd string, info *provider.ResourceInfo, err error) error { - if err != nil { - p.logger.Error().Err(err).Msg("error walking the tree") - return err - } - - if info == nil { - return nil - } - - ref := &provider.Reference{ - Path: utils.MakeRelativePath(filepath.Join(wd, info.Path)), - ResourceId: &rootID, - } - p.logger.Debug().Str("path", ref.Path).Msg("Walking tree") - - // Has this item/subtree changed? - searchRes, err := p.indexClient.Search(ownerCtx, &searchsvc.SearchIndexRequest{ - Query: "+ID:" + storagespace.FormatResourceID(*info.Id) + ` +Mtime:>="` + utils.TSToTime(info.Mtime).Format(time.RFC3339Nano) + `"`, - }) - if err == nil && len(searchRes.Matches) >= 1 { - if info.Type == provider.ResourceType_RESOURCE_TYPE_CONTAINER { - p.logger.Debug().Str("path", ref.Path).Msg("subtree hasn't changed. Skipping.") - return filepath.SkipDir - } - p.logger.Debug().Str("path", ref.Path).Msg("element hasn't changed. Skipping.") - return nil - } - - err = p.indexClient.Add(ref, info) - if err != nil { - p.logger.Error().Err(err).Msg("error adding resource to the index") - } else { - p.logger.Debug().Interface("ref", ref).Msg("added resource to index") - } - return nil - }) - if err != nil { - return err - } - - p.logDocCount() - return nil -} - -func (p *Provider) logDocCount() { - c, err := p.indexClient.DocCount() - if err != nil { - p.logger.Error().Err(err).Msg("error getting document count from the index") - } - p.logger.Debug().Interface("count", c).Msg("new document count") -} - -func formatQuery(q string) string { - query := q - fields := []string{"RootID", "Path", "ID", "Name", "Size", "Mtime", "MimeType", "Type"} - for _, field := range fields { - query = strings.ReplaceAll(query, strings.ToLower(field)+":", field+":") - } - - if strings.Contains(query, ":") { - return query // Sophisticated field based search - } - - // this is a basic filename search - return "Name:*" + strings.ReplaceAll(strings.ToLower(query), " ", `\ `) + "*" -} - -// NOTE: this converts CS3 to WebDAV permissions -// since conversions pkg is reva internal we have no other choice than to duplicate the logic -func convertToWebDAVPermissions(isShared, isMountpoint, isDir bool, p *provider.ResourcePermissions) string { - if p == nil { - return "" - } - var b strings.Builder - if isShared { - fmt.Fprintf(&b, "S") - } - if p.ListContainer && - p.ListFileVersions && - p.ListRecycle && - p.Stat && - p.GetPath && - p.GetQuota && - p.InitiateFileDownload { - fmt.Fprintf(&b, "R") - } - if isMountpoint { - fmt.Fprintf(&b, "M") - } - if p.Delete && - p.PurgeRecycle { - fmt.Fprintf(&b, "D") - } - if p.InitiateFileUpload && - p.RestoreFileVersion && - p.RestoreRecycleItem { - fmt.Fprintf(&b, "NV") - if !isDir { - fmt.Fprintf(&b, "W") - } - } - if isDir && - p.ListContainer && - p.Stat && - p.CreateContainer && - p.InitiateFileUpload { - fmt.Fprintf(&b, "CK") - } - return b.String() -} diff --git a/services/search/pkg/search/search.go b/services/search/pkg/search/search.go index acbbbd9c45f..954ea55f546 100644 --- a/services/search/pkg/search/search.go +++ b/services/search/pkg/search/search.go @@ -1,46 +1,141 @@ -// Copyright 2018-2022 CERN -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// In applying this license, CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - package search import ( "context" + "errors" + "fmt" + "strings" - providerv1beta1 "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" - searchsvc "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" + gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" + user "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" + rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + ctxpkg "github.com/cs3org/reva/v2/pkg/ctx" + "github.com/cs3org/reva/v2/pkg/errtypes" + "github.com/cs3org/reva/v2/pkg/utils" + "github.com/owncloud/ocis/v2/ocis-pkg/log" + searchmsg "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0" + "github.com/owncloud/ocis/v2/services/search/pkg/engine" + "google.golang.org/grpc/metadata" ) -//go:generate mockery --name=ProviderClient -//go:generate mockery --name=IndexClient +// ResolveReference makes sure the path is relative to the space root +func ResolveReference(ctx context.Context, ref *provider.Reference, ri *provider.ResourceInfo, gw gateway.GatewayAPIClient) (*provider.Reference, error) { + if ref.GetResourceId().GetOpaqueId() == ref.GetResourceId().GetSpaceId() { + return ref, nil + } + + gpRes, err := gw.GetPath(ctx, &provider.GetPathRequest{ + ResourceId: ri.Id, + }) + if err != nil || gpRes.Status.Code != rpc.Code_CODE_OK { + return nil, err + } + return &provider.Reference{ + ResourceId: &provider.ResourceId{ + StorageId: ref.GetResourceId().GetStorageId(), + SpaceId: ref.GetResourceId().GetSpaceId(), + OpaqueId: ref.GetResourceId().GetSpaceId(), + }, + Path: utils.MakeRelativePath(gpRes.Path), + }, nil +} + +type matchArray []*searchmsg.Match + +func (ma matchArray) Len() int { + return len(ma) +} +func (ma matchArray) Swap(i, j int) { + ma[i], ma[j] = ma[j], ma[i] +} +func (ma matchArray) Less(i, j int) bool { + return ma[i].Score > ma[j].Score +} + +func logDocCount(engine engine.Engine, logger log.Logger) { + c, err := engine.DocCount() + if err != nil { + logger.Error().Err(err).Msg("error getting document count from the index") + } + logger.Debug().Interface("count", c).Msg("new document count") +} + +func getAuthContext(owner *user.User, gw gateway.GatewayAPIClient, secret string, logger log.Logger) (context.Context, error) { + ownerCtx := ctxpkg.ContextSetUser(context.Background(), owner) + authRes, err := gw.Authenticate(ownerCtx, &gateway.AuthenticateRequest{ + Type: "machine", + ClientId: "userid:" + owner.GetId().GetOpaqueId(), + ClientSecret: secret, + }) + + if err == nil && authRes.GetStatus().GetCode() != rpc.Code_CODE_OK { + err = errtypes.NewErrtypeFromStatus(authRes.Status) + } + + if err != nil { + logger.Error().Err(err).Interface("owner", owner).Interface("authRes", authRes).Msg("error using machine auth") + return nil, err + } + + return metadata.AppendToOutgoingContext(ownerCtx, ctxpkg.TokenHeader, authRes.Token), nil +} + +func statResource(ctx context.Context, ref *provider.Reference, gw gateway.GatewayAPIClient, logger log.Logger) (*provider.StatResponse, error) { + res, err := gw.Stat(ctx, &provider.StatRequest{Ref: ref}) + if err != nil { + logger.Error().Err(err).Msg("failed to stat the moved resource") + return nil, err + } + if res.Status.Code != rpc.Code_CODE_OK { + err := errors.New("failed to stat the moved resource") + logger.Error().Interface("res", res).Msg(err.Error()) + return nil, err + } -// ProviderClient is the interface to the search provider service -type ProviderClient interface { - Search(ctx context.Context, req *searchsvc.SearchRequest) (*searchsvc.SearchResponse, error) - IndexSpace(ctx context.Context, req *searchsvc.IndexSpaceRequest) (*searchsvc.IndexSpaceResponse, error) + return res, nil } -// IndexClient is the interface to the search index -type IndexClient interface { - Search(ctx context.Context, req *searchsvc.SearchIndexRequest) (*searchsvc.SearchIndexResponse, error) - Add(ref *providerv1beta1.Reference, ri *providerv1beta1.ResourceInfo) error - Move(id, parentID *providerv1beta1.ResourceId, fullPath string) error - Delete(id *providerv1beta1.ResourceId) error - Restore(id *providerv1beta1.ResourceId) error - Purge(id *providerv1beta1.ResourceId) error - DocCount() (uint64, error) +// NOTE: this converts CS3 to WebDAV permissions +// since conversions pkg is reva internal we have no other choice than to duplicate the logic +func convertToWebDAVPermissions(isShared, isMountpoint, isDir bool, p *provider.ResourcePermissions) string { + if p == nil { + return "" + } + var b strings.Builder + if isShared { + fmt.Fprintf(&b, "S") + } + if p.ListContainer && + p.ListFileVersions && + p.ListRecycle && + p.Stat && + p.GetPath && + p.GetQuota && + p.InitiateFileDownload { + fmt.Fprintf(&b, "R") + } + if isMountpoint { + fmt.Fprintf(&b, "M") + } + if p.Delete && + p.PurgeRecycle { + fmt.Fprintf(&b, "D") + } + if p.InitiateFileUpload && + p.RestoreFileVersion && + p.RestoreRecycleItem { + fmt.Fprintf(&b, "NV") + if !isDir { + fmt.Fprintf(&b, "W") + } + } + if isDir && + p.ListContainer && + p.Stat && + p.CreateContainer && + p.InitiateFileUpload { + fmt.Fprintf(&b, "CK") + } + return b.String() } diff --git a/services/search/pkg/search/service.go b/services/search/pkg/search/service.go new file mode 100644 index 00000000000..2ba8dd9ba2a --- /dev/null +++ b/services/search/pkg/search/service.go @@ -0,0 +1,355 @@ +package search + +import ( + "context" + "fmt" + "path/filepath" + "sort" + "strings" + "time" + + gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" + user "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" + rpcv1beta1 "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/cs3org/reva/v2/pkg/errtypes" + sdk "github.com/cs3org/reva/v2/pkg/sdk/common" + "github.com/cs3org/reva/v2/pkg/storage/utils/walker" + "github.com/cs3org/reva/v2/pkg/storagespace" + "github.com/cs3org/reva/v2/pkg/utils" + "github.com/owncloud/ocis/v2/ocis-pkg/log" + searchmsg "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0" + searchsvc "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" + "github.com/owncloud/ocis/v2/services/search/pkg/config" + "github.com/owncloud/ocis/v2/services/search/pkg/content" + "github.com/owncloud/ocis/v2/services/search/pkg/engine" +) + +//go:generate mockery --name=Searcher + +// Searcher is the interface to the SearchService +type Searcher interface { + Search(ctx context.Context, req *searchsvc.SearchRequest) (*searchsvc.SearchResponse, error) + IndexSpace(rID *provider.StorageSpaceId, uID *user.UserId) error + TrashItem(rID *provider.ResourceId) + UpsertItem(ref *provider.Reference, uID *user.UserId) + RestoreItem(ref *provider.Reference, uID *user.UserId) + MoveItem(ref *provider.Reference, uID *user.UserId) +} + +// Service is responsible for indexing spaces and pass on a search +// to it's underlying engine. +type Service struct { + logger log.Logger + gateway gateway.GatewayAPIClient + engine engine.Engine + extractor content.Extractor + secret string +} + +// NewService creates a new Provider instance. +func NewService(gw gateway.GatewayAPIClient, eng engine.Engine, extractor content.Extractor, logger log.Logger, cfg *config.Config) *Service { + var s = &Service{ + gateway: gw, + engine: eng, + secret: cfg.MachineAuthAPIKey, + logger: logger, + extractor: extractor, + } + + return s +} + +// Search processes a search request and passes it down to the engine. +func (s *Service) Search(ctx context.Context, req *searchsvc.SearchRequest) (*searchsvc.SearchResponse, error) { + if req.Query == "" { + return nil, errtypes.BadRequest("empty query provided") + } + s.logger.Debug().Str("query", req.Query).Msg("performing a search") + + listSpacesRes, err := s.gateway.ListStorageSpaces(ctx, &provider.ListStorageSpacesRequest{ + Filters: []*provider.ListStorageSpacesRequest_Filter{ + { + Type: provider.ListStorageSpacesRequest_Filter_TYPE_SPACE_TYPE, + Term: &provider.ListStorageSpacesRequest_Filter_SpaceType{SpaceType: "+grant"}, + }, + }, + }) + if err != nil { + s.logger.Error().Err(err).Msg("failed to list the user's storage spaces") + return nil, err + } + + mountpointMap := map[string]string{} + for _, space := range listSpacesRes.StorageSpaces { + if space.SpaceType != "mountpoint" { + continue + } + opaqueMap := sdk.DecodeOpaqueMap(space.Opaque) + grantSpaceID := storagespace.FormatResourceID(provider.ResourceId{ + StorageId: opaqueMap["grantStorageID"], + SpaceId: opaqueMap["grantSpaceID"], + OpaqueId: opaqueMap["grantOpaqueID"], + }) + mountpointMap[grantSpaceID] = space.Id.OpaqueId + } + + matches := matchArray{} + total := int32(0) + for _, space := range listSpacesRes.StorageSpaces { + searchRootID := &searchmsg.ResourceID{ + StorageId: space.Root.StorageId, + SpaceId: space.Root.SpaceId, + OpaqueId: space.Root.OpaqueId, + } + + if req.Ref != nil && + (req.Ref.ResourceId.StorageId != searchRootID.StorageId || + req.Ref.ResourceId.SpaceId != searchRootID.SpaceId || + req.Ref.ResourceId.OpaqueId != searchRootID.OpaqueId) { + continue + } + + var ( + mountpointRootID *searchmsg.ResourceID + rootName string + permissions *provider.ResourcePermissions + ) + mountpointPrefix := "" + switch space.SpaceType { + case "mountpoint": + continue // mountpoint spaces are only "links" to the shared spaces. we have to search the shared "grant" space instead + case "grant": + // In case of grant spaces we search the root of the outer space and translate the paths to the according mountpoint + searchRootID.OpaqueId = space.Root.SpaceId + mountpointID, ok := mountpointMap[space.Id.OpaqueId] + if !ok { + s.logger.Warn().Interface("space", space).Msg("could not find mountpoint space for grant space") + continue + } + gpRes, err := s.gateway.GetPath(ctx, &provider.GetPathRequest{ + ResourceId: space.Root, + }) + if err != nil { + s.logger.Error().Err(err).Str("space", space.Id.OpaqueId).Msg("failed to get path for grant space root") + continue + } + if gpRes.Status.Code != rpcv1beta1.Code_CODE_OK { + s.logger.Error().Interface("status", gpRes.Status).Str("space", space.Id.OpaqueId).Msg("failed to get path for grant space root") + continue + } + mountpointPrefix = utils.MakeRelativePath(gpRes.Path) + sid, spid, oid, err := storagespace.SplitID(mountpointID) + if err != nil { + s.logger.Error().Err(err).Str("space", space.Id.OpaqueId).Str("mountpointId", mountpointID).Msg("invalid mountpoint space id") + continue + } + mountpointRootID = &searchmsg.ResourceID{ + StorageId: sid, + SpaceId: spid, + OpaqueId: oid, + } + rootName = space.GetRootInfo().GetPath() + permissions = space.GetRootInfo().GetPermissionSet() + s.logger.Debug().Interface("grantSpace", space).Interface("mountpointRootId", mountpointRootID).Msg("searching a grant") + case "personal": + permissions = space.GetRootInfo().GetPermissionSet() + } + + res, err := s.engine.Search(ctx, &searchsvc.SearchIndexRequest{ + Query: req.Query, + Ref: &searchmsg.Reference{ + ResourceId: searchRootID, + Path: mountpointPrefix, + }, + PageSize: req.PageSize, + }) + if err != nil { + s.logger.Error().Err(err).Str("space", space.Id.OpaqueId).Msg("failed to search the index") + return nil, err + } + s.logger.Debug().Str("space", space.Id.OpaqueId).Int("hits", len(res.Matches)).Msg("space search done") + + total += res.TotalMatches + for _, match := range res.Matches { + if mountpointPrefix != "" { + match.Entity.Ref.Path = utils.MakeRelativePath(strings.TrimPrefix(match.Entity.Ref.Path, mountpointPrefix)) + } + if mountpointRootID != nil { + match.Entity.Ref.ResourceId = mountpointRootID + } + match.Entity.ShareRootName = rootName + + isShared := match.GetEntity().GetRef().GetResourceId().GetSpaceId() == utils.ShareStorageSpaceID + isMountpoint := isShared && match.GetEntity().GetRef().GetPath() == "." + isDir := match.GetEntity().GetMimeType() == "httpd/unix-directory" + match.Entity.Permissions = convertToWebDAVPermissions(isShared, isMountpoint, isDir, permissions) + matches = append(matches, match) + } + } + + // compile one sorted list of matches from all spaces and apply the limit if needed + sort.Sort(matches) + limit := req.PageSize + if limit == 0 { + limit = 200 + } + if int32(len(matches)) > limit && limit != -1 { + matches = matches[0:limit] + } + + return &searchsvc.SearchResponse{ + Matches: matches, + TotalMatches: total, + }, nil +} + +// IndexSpace (re)indexes all resources of a given space. +func (s *Service) IndexSpace(spaceID *provider.StorageSpaceId, uID *user.UserId) error { + ownerCtx, err := getAuthContext(&user.User{Id: uID}, s.gateway, s.secret, s.logger) + if err != nil { + return err + } + + rootID, err := storagespace.ParseID(spaceID.OpaqueId) + if err != nil { + s.logger.Error().Err(err).Msg("invalid space id") + return err + } + if rootID.StorageId == "" || rootID.SpaceId == "" { + s.logger.Error().Err(err).Msg("invalid space id") + return fmt.Errorf("invalid space id") + } + rootID.OpaqueId = rootID.SpaceId + + w := walker.NewWalker(s.gateway) + err = w.Walk(ownerCtx, &rootID, func(wd string, info *provider.ResourceInfo, err error) error { + if err != nil { + s.logger.Error().Err(err).Msg("error walking the tree") + return err + } + + if info == nil { + return nil + } + + ref := &provider.Reference{ + Path: utils.MakeRelativePath(filepath.Join(wd, info.Path)), + ResourceId: &rootID, + } + s.logger.Debug().Str("path", ref.Path).Msg("Walking tree") + + searchRes, err := s.engine.Search(ownerCtx, &searchsvc.SearchIndexRequest{ + Query: "+ID:" + storagespace.FormatResourceID(*info.Id) + ` +Mtime:>="` + utils.TSToTime(info.Mtime).Format(time.RFC3339Nano) + `"`, + }) + + if err == nil && len(searchRes.Matches) >= 1 { + if info.Type == provider.ResourceType_RESOURCE_TYPE_CONTAINER { + s.logger.Debug().Str("path", ref.Path).Msg("subtree hasn't changed. Skipping.") + return filepath.SkipDir + } + s.logger.Debug().Str("path", ref.Path).Msg("element hasn't changed. Skipping.") + return nil + } + + s.UpsertItem(ref, uID) + + return nil + }) + + if err != nil { + return err + } + + logDocCount(s.engine, s.logger) + + return nil +} + +// TrashItem marks the item as deleted. +func (s *Service) TrashItem(rID *provider.ResourceId) { + err := s.engine.Delete(storagespace.FormatResourceID(*rID)) + if err != nil { + s.logger.Error().Err(err).Interface("Id", rID).Msg("failed to remove item from index") + } +} + +// UpsertItem indexes or stores Resource data fields. +func (s *Service) UpsertItem(ref *provider.Reference, uID *user.UserId) { + ctx, stat, path := s.resInfo(uID, ref) + if ctx == nil || stat == nil || path == "" { + return + } + + doc, err := s.extractor.Extract(ctx, stat.Info) + if err != nil { + s.logger.Error().Err(err).Msg("failed to extract resource content") + return + } + + r := engine.Resource{ + ID: storagespace.FormatResourceID(*stat.Info.Id), + RootID: storagespace.FormatResourceID(provider.ResourceId{ + StorageId: stat.Info.Id.StorageId, + OpaqueId: stat.Info.Id.SpaceId, + SpaceId: stat.Info.Id.SpaceId, + }), + Path: utils.MakeRelativePath(path), + Type: uint64(stat.Info.Type), + Document: doc, + } + r.Hidden = strings.HasPrefix(r.Path, ".") + + if parentID := stat.GetInfo().GetParentId(); parentID != nil { + r.ParentID = storagespace.FormatResourceID(*parentID) + } + + if err = s.engine.Upsert(r.ID, r); err != nil { + s.logger.Error().Err(err).Msg("error adding updating the resource in the index") + } else { + logDocCount(s.engine, s.logger) + } +} + +// RestoreItem makes the item available again. +func (s *Service) RestoreItem(ref *provider.Reference, uID *user.UserId) { + ctx, stat, path := s.resInfo(uID, ref) + if ctx == nil || stat == nil || path == "" { + return + } + + if err := s.engine.Restore(storagespace.FormatResourceID(*stat.Info.Id)); err != nil { + s.logger.Error().Err(err).Msg("failed to restore the changed resource in the index") + } +} + +// MoveItem updates the resource location and all of its necessary fields. +func (s *Service) MoveItem(ref *provider.Reference, uID *user.UserId) { + ctx, stat, path := s.resInfo(uID, ref) + if ctx == nil || stat == nil || path == "" { + return + } + + if err := s.engine.Move(storagespace.FormatResourceID(*stat.GetInfo().GetId()), storagespace.FormatResourceID(*stat.GetInfo().GetParentId()), path); err != nil { + s.logger.Error().Err(err).Msg("failed to move the changed resource in the index") + } +} + +func (s *Service) resInfo(uID *user.UserId, ref *provider.Reference) (context.Context, *provider.StatResponse, string) { + ownerCtx, err := getAuthContext(&user.User{Id: uID}, s.gateway, s.secret, s.logger) + if err != nil { + return nil, nil, "" + } + + statRes, err := statResource(ownerCtx, ref, s.gateway, s.logger) + if err != nil { + return nil, nil, "" + } + + r, err := ResolveReference(ownerCtx, ref, statRes.GetInfo(), s.gateway) + if err != nil { + return nil, nil, "" + } + + return ownerCtx, statRes, r.GetPath() +} diff --git a/services/search/pkg/search/provider/searchprovider_test.go b/services/search/pkg/search/service_test.go similarity index 72% rename from services/search/pkg/search/provider/searchprovider_test.go rename to services/search/pkg/search/service_test.go index aac1d7a85f3..fc374c7e16f 100644 --- a/services/search/pkg/search/provider/searchprovider_test.go +++ b/services/search/pkg/search/service_test.go @@ -1,38 +1,36 @@ -package provider_test +package search_test import ( "context" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "github.com/stretchr/testify/mock" gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" userv1beta1 "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" sprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" typesv1beta1 "github.com/cs3org/go-cs3apis/cs3/types/v1beta1" "github.com/cs3org/reva/v2/pkg/rgrpc/status" - "github.com/cs3org/reva/v2/pkg/utils" cs3mocks "github.com/cs3org/reva/v2/tests/cs3mocks/mocks" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" "github.com/owncloud/ocis/v2/ocis-pkg/log" searchmsg "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0" searchsvc "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" - "github.com/owncloud/ocis/v2/services/search/pkg/search/mocks" - provider "github.com/owncloud/ocis/v2/services/search/pkg/search/provider" + "github.com/owncloud/ocis/v2/services/search/pkg/config" + "github.com/owncloud/ocis/v2/services/search/pkg/content" + contentMocks "github.com/owncloud/ocis/v2/services/search/pkg/content/mocks" + engineMocks "github.com/owncloud/ocis/v2/services/search/pkg/engine/mocks" + "github.com/owncloud/ocis/v2/services/search/pkg/search" + "github.com/stretchr/testify/mock" ) var _ = Describe("Searchprovider", func() { var ( - p *provider.Provider - gwClient *cs3mocks.GatewayAPIClient - indexClient *mocks.IndexClient - - ctx context.Context - eventsChan chan interface{} - - logger = log.NewLogger() - user = &userv1beta1.User{ + s search.Searcher + extractor *contentMocks.Extractor + gw *cs3mocks.GatewayAPIClient + indexClient *engineMocks.Engine + ctx context.Context + logger = log.NewLogger() + user = &userv1beta1.User{ Id: &userv1beta1.UserId{ OpaqueId: "user", }, @@ -61,29 +59,33 @@ var _ = Describe("Searchprovider", func() { StorageId: "storageid", OpaqueId: "opaqueid", }, + ParentId: &sprovider.ResourceId{ + StorageId: "storageid", + OpaqueId: "parentopaqueid", + }, Path: "foo.pdf", Size: 12345, - Mtime: utils.TimeToTS(time.Now().Add(-time.Hour)), + Mtime: &typesv1beta1.Timestamp{Seconds: 4000}, } ) BeforeEach(func() { ctx = context.Background() - eventsChan = make(chan interface{}) - gwClient = &cs3mocks.GatewayAPIClient{} - indexClient = &mocks.IndexClient{} + gw = &cs3mocks.GatewayAPIClient{} + indexClient = &engineMocks.Engine{} + extractor = &contentMocks.Extractor{} - p = provider.New(gwClient, indexClient, "", eventsChan, 1000, logger) + s = search.NewService(gw, indexClient, extractor, logger, &config.Config{}) - gwClient.On("Authenticate", mock.Anything, mock.Anything).Return(&gateway.AuthenticateResponse{ + gw.On("Authenticate", mock.Anything, mock.Anything).Return(&gateway.AuthenticateResponse{ Status: status.NewOK(ctx), Token: "authtoken", }, nil) - gwClient.On("Stat", mock.Anything, mock.Anything).Return(&sprovider.StatResponse{ + gw.On("Stat", mock.Anything, mock.Anything).Return(&sprovider.StatResponse{ Status: status.NewOK(context.Background()), Info: ri, }, nil) - gwClient.On("GetPath", mock.Anything, mock.MatchedBy(func(req *sprovider.GetPathRequest) bool { + gw.On("GetPath", mock.Anything, mock.MatchedBy(func(req *sprovider.GetPathRequest) bool { return req.ResourceId.OpaqueId == ri.Id.OpaqueId })).Return(&sprovider.GetPathResponse{ Status: status.NewOK(context.Background()), @@ -94,34 +96,29 @@ var _ = Describe("Searchprovider", func() { Describe("New", func() { It("returns a new instance", func() { - p := provider.New(gwClient, indexClient, "", eventsChan, 1000, logger) - Expect(p).ToNot(BeNil()) + s := search.NewService(gw, indexClient, extractor, logger, &config.Config{}) + Expect(s).ToNot(BeNil()) }) }) Describe("IndexSpace", func() { It("walks the space and indexes all files", func() { - gwClient.On("GetUserByClaim", mock.Anything, mock.Anything).Return(&userv1beta1.GetUserByClaimResponse{ + gw.On("GetUserByClaim", mock.Anything, mock.Anything).Return(&userv1beta1.GetUserByClaimResponse{ Status: status.NewOK(context.Background()), User: user, }, nil) - indexClient.On("Add", mock.Anything, mock.MatchedBy(func(riToIndex *sprovider.ResourceInfo) bool { - return riToIndex.Id.OpaqueId == ri.Id.OpaqueId - })).Return(nil) + extractor.On("Extract", mock.Anything, mock.Anything, mock.Anything).Return(content.Document{}, nil) + indexClient.On("Upsert", mock.Anything, mock.Anything).Return(nil) indexClient.On("Search", mock.Anything, mock.Anything).Return(&searchsvc.SearchIndexResponse{}, nil) - res, err := p.IndexSpace(ctx, &searchsvc.IndexSpaceRequest{ - SpaceId: "storageid$spaceid!spaceid", - UserId: "user", - }) - Expect(err).ToNot(HaveOccurred()) - Expect(res).ToNot(BeNil()) + err := s.IndexSpace(&sprovider.StorageSpaceId{OpaqueId: "storageid$spaceid!spaceid"}, user.Id) + Expect(err).ShouldNot(HaveOccurred()) }) }) Describe("Search", func() { It("fails when an empty query is given", func() { - res, err := p.Search(ctx, &searchsvc.SearchRequest{ + res, err := s.Search(ctx, &searchsvc.SearchRequest{ Query: "", }) Expect(err).To(HaveOccurred()) @@ -130,7 +127,7 @@ var _ = Describe("Searchprovider", func() { Context("with a personal space", func() { BeforeEach(func() { - gwClient.On("ListStorageSpaces", mock.Anything, mock.Anything).Return(&sprovider.ListStorageSpacesResponse{ + gw.On("ListStorageSpaces", mock.Anything, mock.Anything).Return(&sprovider.ListStorageSpacesResponse{ Status: status.NewOK(ctx), StorageSpaces: []*sprovider.StorageSpace{personalSpace}, }, nil) @@ -159,52 +156,18 @@ var _ = Describe("Searchprovider", func() { }, nil) }) - It("lowercases the filename", func() { - p.Search(ctx, &searchsvc.SearchRequest{ - Query: "Foo.pdf", - }) - indexClient.AssertCalled(GinkgoT(), "Search", mock.Anything, mock.MatchedBy(func(req *searchsvc.SearchIndexRequest) bool { - return req.Query == "Name:*foo.pdf*" - })) - }) - It("does not mess with field-based searches", func() { - p.Search(ctx, &searchsvc.SearchRequest{ + _, err := s.Search(ctx, &searchsvc.SearchRequest{ Query: "Size:<10", }) + Expect(err).ToNot(HaveOccurred()) indexClient.AssertCalled(GinkgoT(), "Search", mock.Anything, mock.MatchedBy(func(req *searchsvc.SearchIndexRequest) bool { return req.Query == "Size:<10" })) }) - It("uppercases field names", func() { - tests := []struct { - Original string - Expected string - }{ - {Original: "size:<100", Expected: "Size:<100"}, - } - for _, test := range tests { - p.Search(ctx, &searchsvc.SearchRequest{ - Query: test.Original, - }) - indexClient.AssertCalled(GinkgoT(), "Search", mock.Anything, mock.MatchedBy(func(req *searchsvc.SearchIndexRequest) bool { - return req.Query == test.Expected - })) - } - }) - - It("escapes special characters", func() { - p.Search(ctx, &searchsvc.SearchRequest{ - Query: "Foo oo.pdf", - }) - indexClient.AssertCalled(GinkgoT(), "Search", mock.Anything, mock.MatchedBy(func(req *searchsvc.SearchIndexRequest) bool { - return req.Query == `Name:*foo\ oo.pdf*` - })) - }) - It("searches the personal user space", func() { - res, err := p.Search(ctx, &searchsvc.SearchRequest{ + res, err := s.Search(ctx, &searchsvc.SearchRequest{ Query: "foo", }) Expect(err).ToNot(HaveOccurred()) @@ -216,10 +179,6 @@ var _ = Describe("Searchprovider", func() { Expect(match.Entity.Name).To(Equal("Foo.pdf")) Expect(match.Entity.Ref.ResourceId.OpaqueId).To(Equal(personalSpace.Root.OpaqueId)) Expect(match.Entity.Ref.Path).To(Equal("./path/to/Foo.pdf")) - - indexClient.AssertCalled(GinkgoT(), "Search", mock.Anything, mock.MatchedBy(func(req *searchsvc.SearchIndexRequest) bool { - return req.Query == "Name:*foo*" && req.Ref.ResourceId.OpaqueId == personalSpace.Root.OpaqueId && req.Ref.Path == "" - })) }) }) @@ -251,14 +210,14 @@ var _ = Describe("Searchprovider", func() { }, }, } - gwClient.On("GetPath", mock.Anything, mock.Anything).Return(&sprovider.GetPathResponse{ + gw.On("GetPath", mock.Anything, mock.Anything).Return(&sprovider.GetPathResponse{ Status: status.NewOK(ctx), Path: "/grant/path", }, nil) }) It("searches the received spaces", func() { - gwClient.On("ListStorageSpaces", mock.Anything, mock.Anything).Return(&sprovider.ListStorageSpacesResponse{ + gw.On("ListStorageSpaces", mock.Anything, mock.Anything).Return(&sprovider.ListStorageSpacesResponse{ Status: status.NewOK(ctx), StorageSpaces: []*sprovider.StorageSpace{grantSpace, mountpointSpace}, }, nil) @@ -285,7 +244,7 @@ var _ = Describe("Searchprovider", func() { }, }, nil) - res, err := p.Search(ctx, &searchsvc.SearchRequest{ + res, err := s.Search(ctx, &searchsvc.SearchRequest{ Query: "Foo", }) Expect(err).ToNot(HaveOccurred()) @@ -296,15 +255,11 @@ var _ = Describe("Searchprovider", func() { Expect(match.Entity.Name).To(Equal("Shared.pdf")) Expect(match.Entity.Ref.ResourceId.OpaqueId).To(Equal(mountpointSpace.Root.OpaqueId)) Expect(match.Entity.Ref.Path).To(Equal("./to/Shared.pdf")) - - indexClient.AssertCalled(GinkgoT(), "Search", mock.Anything, mock.MatchedBy(func(req *searchsvc.SearchIndexRequest) bool { - return req.Query == "Name:*foo*" && req.Ref.ResourceId.StorageId == grantSpace.Root.StorageId && req.Ref.Path == "./grant/path" - })) }) Context("when searching both spaces", func() { BeforeEach(func() { - gwClient.On("ListStorageSpaces", mock.Anything, mock.Anything).Return(&sprovider.ListStorageSpacesResponse{ + gw.On("ListStorageSpaces", mock.Anything, mock.Anything).Return(&sprovider.ListStorageSpacesResponse{ Status: status.NewOK(ctx), StorageSpaces: []*sprovider.StorageSpace{personalSpace, grantSpace, mountpointSpace}, }, nil) @@ -381,7 +336,7 @@ var _ = Describe("Searchprovider", func() { }) It("considers the search Ref parameter", func() { - res, err := p.Search(ctx, &searchsvc.SearchRequest{ + res, err := s.Search(ctx, &searchsvc.SearchRequest{ Query: "foo", Ref: &searchmsg.Reference{ ResourceId: &searchmsg.ResourceID{ @@ -398,7 +353,7 @@ var _ = Describe("Searchprovider", func() { }) It("finds matches in both the personal space AND the grant", func() { - res, err := p.Search(ctx, &searchsvc.SearchRequest{ + res, err := s.Search(ctx, &searchsvc.SearchRequest{ Query: "foo", }) Expect(err).ToNot(HaveOccurred()) @@ -409,7 +364,7 @@ var _ = Describe("Searchprovider", func() { }) It("sorts and limits the combined results from all spaces", func() { - res, err := p.Search(ctx, &searchsvc.SearchRequest{ + res, err := s.Search(ctx, &searchsvc.SearchRequest{ Query: "foo", PageSize: 2, }) diff --git a/services/search/pkg/server/grpc/option.go b/services/search/pkg/server/grpc/option.go index 1aec67b0033..577a862d6af 100644 --- a/services/search/pkg/server/grpc/option.go +++ b/services/search/pkg/server/grpc/option.go @@ -6,7 +6,7 @@ import ( "github.com/owncloud/ocis/v2/ocis-pkg/log" "github.com/owncloud/ocis/v2/services/search/pkg/config" "github.com/owncloud/ocis/v2/services/search/pkg/metrics" - svc "github.com/owncloud/ocis/v2/services/search/pkg/service/v0" + svc "github.com/owncloud/ocis/v2/services/search/pkg/service/grpc/v0" "github.com/urfave/cli/v2" ) diff --git a/services/search/pkg/server/grpc/server.go b/services/search/pkg/server/grpc/server.go index c152d47ef28..da3c14f1977 100644 --- a/services/search/pkg/server/grpc/server.go +++ b/services/search/pkg/server/grpc/server.go @@ -4,11 +4,11 @@ import ( "github.com/owncloud/ocis/v2/ocis-pkg/service/grpc" "github.com/owncloud/ocis/v2/ocis-pkg/version" searchsvc "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" - svc "github.com/owncloud/ocis/v2/services/search/pkg/service/v0" + svc "github.com/owncloud/ocis/v2/services/search/pkg/service/grpc/v0" ) // Server initializes a new go-micro service ready to run -func Server(opts ...Option) grpc.Service { +func Server(opts ...Option) (grpc.Service, func(), error) { options := newOptions(opts...) service, err := grpc.NewService( @@ -27,10 +27,10 @@ func Server(opts ...Option) grpc.Service { ) if err != nil { options.Logger.Fatal().Err(err).Msg("Error creating search service") - return grpc.Service{} + return grpc.Service{}, func() {}, err } - handle, err := svc.NewHandler( + handle, teardown, err := svc.NewHandler( svc.Config(options.Config), svc.Logger(options.Logger), ) @@ -38,11 +38,18 @@ func Server(opts ...Option) grpc.Service { options.Logger.Error(). Err(err). Msg("Error initializing search service") - return grpc.Service{} + return grpc.Service{}, teardown, err } - _ = searchsvc.RegisterSearchProviderHandler( + + if err := searchsvc.RegisterSearchProviderHandler( service.Server(), handle, - ) - return service + ); err != nil { + options.Logger.Error(). + Err(err). + Msg("Error registering search provider handler") + return grpc.Service{}, teardown, err + } + + return service, teardown, nil } diff --git a/services/search/pkg/service/v0/option.go b/services/search/pkg/service/grpc/v0/option.go similarity index 100% rename from services/search/pkg/service/v0/option.go rename to services/search/pkg/service/grpc/v0/option.go diff --git a/services/search/pkg/service/grpc/v0/service.go b/services/search/pkg/service/grpc/v0/service.go new file mode 100644 index 00000000000..712896f549a --- /dev/null +++ b/services/search/pkg/service/grpc/v0/service.go @@ -0,0 +1,200 @@ +package service + +import ( + "context" + "crypto/tls" + "crypto/x509" + "errors" + "fmt" + "os" + "time" + + user "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" + provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + ctxpkg "github.com/cs3org/reva/v2/pkg/ctx" + revactx "github.com/cs3org/reva/v2/pkg/ctx" + "github.com/cs3org/reva/v2/pkg/errtypes" + "github.com/cs3org/reva/v2/pkg/events/server" + "github.com/cs3org/reva/v2/pkg/rgrpc/todo/pool" + "github.com/go-micro/plugins/v4/events/natsjs" + "github.com/jellydator/ttlcache/v2" + ociscrypto "github.com/owncloud/ocis/v2/ocis-pkg/crypto" + "github.com/owncloud/ocis/v2/ocis-pkg/log" + v0 "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0" + searchsvc "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" + "github.com/owncloud/ocis/v2/services/search/pkg/content" + "github.com/owncloud/ocis/v2/services/search/pkg/engine" + "github.com/owncloud/ocis/v2/services/search/pkg/search" + merrors "go-micro.dev/v4/errors" + "go-micro.dev/v4/metadata" + grpcmetadata "google.golang.org/grpc/metadata" +) + +// NewHandler returns a service implementation for Service. +func NewHandler(opts ...Option) (searchsvc.SearchProviderHandler, func(), error) { + teardown := func() {} + options := newOptions(opts...) + logger := options.Logger + cfg := options.Config + + // initialize search engine + var eng engine.Engine + switch cfg.Engine.Type { + case "bleve": + idx, err := engine.NewBleveIndex(cfg.Engine.Bleve.Datapath) + if err != nil { + return nil, teardown, err + } + + teardown = func() { + _ = idx.Close() + } + + eng = engine.NewBleveEngine(idx) + default: + return nil, teardown, fmt.Errorf("unknown search engine: %s", cfg.Engine.Type) + } + + // initialize gateway + gw, err := pool.GetGatewayServiceClient(cfg.Reva.Address) + if err != nil { + logger.Fatal().Err(err).Str("addr", cfg.Reva.Address).Msg("could not get reva client") + return nil, teardown, err + } + // initialize search content extractor + var extractor content.Extractor + switch cfg.Extractor.Type { + case "basic": + if extractor, err = content.NewBasicExtractor(logger); err != nil { + return nil, teardown, err + } + case "tika": + if extractor, err = content.NewTikaExtractor(gw, logger, cfg); err != nil { + return nil, teardown, err + } + default: + return nil, teardown, fmt.Errorf("unknown search extractor: %s", cfg.Extractor.Type) + } + + var tlsConf *tls.Config + if cfg.Events.EnableTLS { + var rootCAPool *x509.CertPool + if cfg.Events.TLSRootCACertificate != "" { + rootCrtFile, err := os.Open(cfg.Events.TLSRootCACertificate) + if err != nil { + return nil, teardown, err + } + + rootCAPool, err = ociscrypto.NewCertPoolFromPEM(rootCrtFile) + if err != nil { + return nil, teardown, err + } + cfg.Events.TLSInsecure = false + } + + tlsConf = &tls.Config{ + MinVersion: tls.VersionTLS12, + InsecureSkipVerify: cfg.Events.TLSInsecure, //nolint:gosec + RootCAs: rootCAPool, + } + } + bus, err := server.NewNatsStream( + natsjs.TLSConfig(tlsConf), + natsjs.Address(cfg.Events.Endpoint), + natsjs.ClusterID(cfg.Events.Cluster), + ) + if err != nil { + return nil, teardown, err + } + + ss := search.NewService(gw, eng, extractor, logger, cfg) + + // setup event handling + if err := search.HandleEvents(ss, bus, logger, cfg); err != nil { + return nil, teardown, err + } + + cache := ttlcache.NewCache() + if err := cache.SetTTL(time.Second); err != nil { + return nil, teardown, err + } + + return &Service{ + id: cfg.GRPC.Namespace + "." + cfg.Service.Name, + log: logger, + searcher: ss, + cache: cache, + }, teardown, nil +} + +// Service implements the searchServiceHandler interface +type Service struct { + id string + log log.Logger + searcher search.Searcher + cache *ttlcache.Cache +} + +// Search handles the search +func (s Service) Search(ctx context.Context, in *searchsvc.SearchRequest, out *searchsvc.SearchResponse) error { + // Get token from the context (go-micro) and make it known to the reva client too (grpc) + t, ok := metadata.Get(ctx, revactx.TokenHeader) + if !ok { + s.log.Error().Msg("Could not get token from context") + return errors.New("could not get token from context") + } + ctx = grpcmetadata.AppendToOutgoingContext(ctx, revactx.TokenHeader, t) + + u, _ := ctxpkg.ContextGetUser(ctx) + key := cacheKey(in.Query, in.PageSize, in.Ref, u) + res, ok := s.FromCache(key) + if !ok { + var err error + res, err = s.searcher.Search(ctx, &searchsvc.SearchRequest{ + Query: in.Query, + PageSize: in.PageSize, + Ref: in.Ref, + }) + if err != nil { + switch err.(type) { + case errtypes.BadRequest: + return merrors.BadRequest(s.id, err.Error()) + default: + return merrors.InternalServerError(s.id, err.Error()) + } + } + + s.Cache(key, res) + } + + out.Matches = res.Matches + out.TotalMatches = res.TotalMatches + out.NextPageToken = res.NextPageToken + return nil +} + +// IndexSpace (re)indexes all resources of a given space. +func (s Service) IndexSpace(ctx context.Context, in *searchsvc.IndexSpaceRequest, _ *searchsvc.IndexSpaceResponse) error { + return s.searcher.IndexSpace(&provider.StorageSpaceId{OpaqueId: in.SpaceId}, &user.UserId{OpaqueId: in.UserId}) +} + +// FromCache pulls a search result from cache +func (s Service) FromCache(key string) (*searchsvc.SearchResponse, bool) { + v, err := s.cache.Get(key) + if err != nil { + return nil, false + } + + sr, ok := v.(*searchsvc.SearchResponse) + return sr, ok +} + +// Cache caches the search result +func (s Service) Cache(key string, res *searchsvc.SearchResponse) { + // lets ignore the error + _ = s.cache.Set(key, res) +} + +func cacheKey(query string, pagesize int32, ref *v0.Reference, user *user.User) string { + return fmt.Sprintf("%s|%d|%s$%s!%s/%s|%s", query, pagesize, ref.GetResourceId().GetStorageId(), ref.GetResourceId().GetSpaceId(), ref.GetResourceId().GetOpaqueId(), ref.GetPath(), user.GetId().GetOpaqueId()) +} diff --git a/services/search/pkg/service/v0/service.go b/services/search/pkg/service/v0/service.go deleted file mode 100644 index ee1c282eb25..00000000000 --- a/services/search/pkg/service/v0/service.go +++ /dev/null @@ -1,147 +0,0 @@ -package service - -import ( - "context" - "crypto/tls" - "crypto/x509" - "errors" - "os" - "path/filepath" - - "github.com/blevesearch/bleve/v2" - revactx "github.com/cs3org/reva/v2/pkg/ctx" - "github.com/cs3org/reva/v2/pkg/errtypes" - "github.com/cs3org/reva/v2/pkg/events" - "github.com/cs3org/reva/v2/pkg/events/server" - "github.com/cs3org/reva/v2/pkg/rgrpc/todo/pool" - "github.com/go-micro/plugins/v4/events/natsjs" - merrors "go-micro.dev/v4/errors" - "go-micro.dev/v4/metadata" - grpcmetadata "google.golang.org/grpc/metadata" - - ociscrypto "github.com/owncloud/ocis/v2/ocis-pkg/crypto" - "github.com/owncloud/ocis/v2/ocis-pkg/log" - searchsvc "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" - "github.com/owncloud/ocis/v2/services/search/pkg/config" - "github.com/owncloud/ocis/v2/services/search/pkg/search" - "github.com/owncloud/ocis/v2/services/search/pkg/search/index" - searchprovider "github.com/owncloud/ocis/v2/services/search/pkg/search/provider" -) - -// NewHandler returns a service implementation for Service. -func NewHandler(opts ...Option) (searchsvc.SearchProviderHandler, error) { - options := newOptions(opts...) - logger := options.Logger - cfg := options.Config - - // Connect to nats to listen for changes that need to trigger an index update - evtsCfg := cfg.Events - - var tlsConf *tls.Config - if evtsCfg.EnableTLS { - var rootCAPool *x509.CertPool - if evtsCfg.TLSRootCACertificate != "" { - rootCrtFile, err := os.Open(evtsCfg.TLSRootCACertificate) - if err != nil { - return nil, err - } - - rootCAPool, err = ociscrypto.NewCertPoolFromPEM(rootCrtFile) - if err != nil { - return nil, err - } - evtsCfg.TLSInsecure = false - } - - tlsConf = &tls.Config{ - MinVersion: tls.VersionTLS12, - InsecureSkipVerify: evtsCfg.TLSInsecure, //nolint:gosec - RootCAs: rootCAPool, - } - } - client, err := server.NewNatsStream( - natsjs.TLSConfig(tlsConf), - natsjs.Address(evtsCfg.Endpoint), - natsjs.ClusterID(evtsCfg.Cluster), - ) - if err != nil { - return nil, err - } - evts, err := events.Consume(client, evtsCfg.ConsumerGroup, searchprovider.ListenEvents...) - if err != nil { - return nil, err - } - - indexDir := filepath.Join(cfg.Datapath, "index.bleve") - bleveIndex, err := bleve.Open(indexDir) - if err != nil { - mapping, err := index.BuildMapping() - if err != nil { - return nil, err - } - bleveIndex, err = bleve.New(indexDir, mapping) - if err != nil { - return nil, err - } - } - index, err := index.New(bleveIndex) - if err != nil { - return nil, err - } - - gwclient, err := pool.GetGatewayServiceClient(cfg.Reva.Address, cfg.Reva.GetRevaOptions()...) - if err != nil { - logger.Fatal().Err(err).Str("addr", cfg.Reva.Address).Msg("could not get reva client") - } - - provider := searchprovider.New(gwclient, index, cfg.MachineAuthAPIKey, evts, cfg.DebounceDuration, logger) - - return &Service{ - id: cfg.GRPC.Namespace + "." + cfg.Service.Name, - log: logger, - Config: cfg, - provider: provider, - }, nil -} - -// Service implements the searchServiceHandler interface -type Service struct { - id string - log log.Logger - Config *config.Config - provider search.ProviderClient -} - -func (s Service) Search(ctx context.Context, in *searchsvc.SearchRequest, out *searchsvc.SearchResponse) error { - // Get token from the context (go-micro) and make it known to the reva client too (grpc) - t, ok := metadata.Get(ctx, revactx.TokenHeader) - if !ok { - s.log.Error().Msg("Could not get token from context") - return errors.New("could not get token from context") - } - ctx = grpcmetadata.AppendToOutgoingContext(ctx, revactx.TokenHeader, t) - - res, err := s.provider.Search(ctx, &searchsvc.SearchRequest{ - Query: in.Query, - PageSize: in.PageSize, - Ref: in.Ref, - }) - if err != nil { - switch err.(type) { - case errtypes.BadRequest: - return merrors.BadRequest(s.id, err.Error()) - default: - return merrors.InternalServerError(s.id, err.Error()) - } - } - - out.Matches = res.Matches - out.TotalMatches = res.TotalMatches - out.NextPageToken = res.NextPageToken - return nil -} - -func (s Service) IndexSpace(ctx context.Context, in *searchsvc.IndexSpaceRequest, out *searchsvc.IndexSpaceResponse) error { - _, err := s.provider.IndexSpace(ctx, in) - return err -}