From 5435f02451b6adf57781ee19225a0f911f1eca6d Mon Sep 17 00:00:00 2001 From: SimFG <1142838399@qq.com> Date: Tue, 26 Sep 2023 11:04:12 +0800 Subject: [PATCH] cdc with ts --- .gitignore | 3 +- Makefile | 6 + core/api/data_handler.go | 161 ++ core/api/message_manager.go | 12 + core/api/meta_op.go | 39 + core/api/reader.go | 26 + core/api/replicate_manager.go | 41 + core/api/writer.go | 13 + core/config/mq.go | 5 + core/go.mod | 11 +- core/go.sum | 14 +- core/main/with_ts.go | 14 +- core/mocks/cdc_reader_mock.go | 58 - core/mocks/cdc_writer_mock.go | 57 - core/mocks/factory_creator_mock.go | 159 -- core/mocks/milvus_client_api_mock.go | 248 --- core/mocks/milvus_client_factory_mock.go | 116 -- core/mocks/monitor_mock.go | 69 - core/mocks/write_callbakc_mock.go | 47 - core/model/reader.go | 33 + core/reader/channel_reader.go | 7 +- core/reader/collection_reader.go | 721 +++++++++ core/reader/collection_reader_test.go | 589 +++++++ core/reader/config_option.go | 235 +-- core/reader/data_barrier.go | 8 +- core/reader/etcd_op.go | 379 +++++ core/reader/factory_api.go | 27 +- core/reader/milvus_reader.go | 633 -------- core/reader/milvus_reader_test.go | 592 ------- core/reader/monitor.go | 3 - core/reader/reader_api.go | 52 +- core/reader/replicate_channel_manager.go | 414 +++++ core/reader/target_client.go | 67 + core/util/etcd.go | 24 +- core/util/log.go | 29 +- core/util/msg.go | 16 +- core/util/retry.go | 3 +- core/util/string.go | 3 +- core/writer/channel_writer.go | 105 ++ core/writer/config_option.go | 76 +- core/writer/handler.go | 309 ++-- core/writer/milvus_api.go | 109 +- core/writer/milvus_handler.go | 70 +- core/writer/milvus_handler_test.go | 335 ++-- core/writer/msg_size.go | 16 +- core/writer/replicate_message_manager.go | 96 ++ core/writer/writer_api.go | 135 +- core/writer/writer_template.go | 1797 +++++++++++----------- core/writer/writer_template_test.go | 1058 ++++++------- server/cdc_api.go | 15 +- server/cdc_impl.go | 529 ++++--- server/cdc_impl_test.go | 3 +- server/cdc_task.go | 524 ++++--- server/cdc_task_test.go | 7 +- server/configs/cdc.yaml | 3 +- server/data_handler_wrapper.go | 256 +-- server/go.mod | 13 +- server/go.sum | 17 +- server/handle_map.go | 14 +- server/handle_map_test.go | 1 + server/main/main.go | 11 +- server/metrics/metrics_task_num.go | 7 +- server/mocks/cdc_factory.go | 175 +++ server/mocks/cdc_factory_mock.go | 88 -- server/mocks/cdc_service.go | 391 +++++ server/mocks/meta_store.go | 180 +++ server/mocks/meta_store_factory.go | 190 +++ server/mocks/meta_store_factory_mock.go | 99 -- server/mocks/meta_store_mock.go | 83 - server/model/meta/task.go | 1 + server/model/meta/task_test.go | 1 + server/monitor.go | 6 +- server/server.go | 10 +- server/server_test.go | 1 + server/store/etcd.go | 15 +- server/store/meta_op.go | 4 +- server/store/meta_store.go | 3 - server/store/meta_store_test.go | 5 +- server/store/mysql.go | 10 +- server/var.go | 14 +- server/writer_callback.go | 65 +- 81 files changed, 6634 insertions(+), 5147 deletions(-) create mode 100644 core/api/data_handler.go create mode 100644 core/api/message_manager.go create mode 100644 core/api/meta_op.go create mode 100644 core/api/reader.go create mode 100644 core/api/replicate_manager.go create mode 100644 core/api/writer.go delete mode 100644 core/mocks/cdc_reader_mock.go delete mode 100644 core/mocks/cdc_writer_mock.go delete mode 100644 core/mocks/factory_creator_mock.go delete mode 100644 core/mocks/milvus_client_api_mock.go delete mode 100644 core/mocks/milvus_client_factory_mock.go delete mode 100644 core/mocks/monitor_mock.go delete mode 100644 core/mocks/write_callbakc_mock.go create mode 100644 core/model/reader.go create mode 100644 core/reader/collection_reader.go create mode 100644 core/reader/collection_reader_test.go create mode 100644 core/reader/etcd_op.go delete mode 100644 core/reader/milvus_reader.go delete mode 100644 core/reader/milvus_reader_test.go create mode 100644 core/reader/replicate_channel_manager.go create mode 100644 core/reader/target_client.go create mode 100644 core/writer/channel_writer.go create mode 100644 core/writer/replicate_message_manager.go create mode 100644 server/mocks/cdc_factory.go delete mode 100644 server/mocks/cdc_factory_mock.go create mode 100644 server/mocks/cdc_service.go create mode 100644 server/mocks/meta_store.go create mode 100644 server/mocks/meta_store_factory.go delete mode 100644 server/mocks/meta_store_factory_mock.go delete mode 100644 server/mocks/meta_store_mock.go diff --git a/.gitignore b/.gitignore index fee1456e..d8acf098 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ **/__pycache__/* **/.pytest_cache/* server/cdc -**/milvus-cdc-build/* \ No newline at end of file +**/milvus-cdc-build/* +*.log \ No newline at end of file diff --git a/Makefile b/Makefile index 8420fbc8..8a53067a 100644 --- a/Makefile +++ b/Makefile @@ -7,3 +7,9 @@ test-go: static-check: @echo "Running go-lint check:" @(env bash $(PWD)/scripts/run_go_lint.sh) + +# TODO use the array to generate the name list +generate-mockery: + @echo "Generating mockery server mocks..." + @cd "$(PWD)/server"; mockery -r --name "CDCService|CDCFactory|MetaStore|MetaStoreFactory" --output ./mocks --case snake --with-expecter + @cd "$(PWD)/core"; mockery -r --name "CDCReader|CDCWriter|FactoryCreator|Monitor|WriteCallback|MilvusClientFactory|MilvusClientAPI|ChannelManager|TargetAPI|MetaOp" --output ./mocks --case snake --with-expecter \ No newline at end of file diff --git a/core/api/data_handler.go b/core/api/data_handler.go new file mode 100644 index 00000000..d8172556 --- /dev/null +++ b/core/api/data_handler.go @@ -0,0 +1,161 @@ +package api + +import ( + "context" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" + "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" + "github.com/milvus-io/milvus-sdk-go/v2/entity" + "github.com/milvus-io/milvus/pkg/log" +) + +type DataHandler interface { + CreateCollection(ctx context.Context, param *CreateCollectionParam) error + DropCollection(ctx context.Context, param *DropCollectionParam) error + Insert(ctx context.Context, param *InsertParam) error + Delete(ctx context.Context, param *DeleteParam) error + CreatePartition(ctx context.Context, param *CreatePartitionParam) error + DropPartition(ctx context.Context, param *DropPartitionParam) error + + CreateIndex(ctx context.Context, param *CreateIndexParam) error + DropIndex(ctx context.Context, param *DropIndexParam) error + LoadCollection(ctx context.Context, param *LoadCollectionParam) error + ReleaseCollection(ctx context.Context, param *ReleaseCollectionParam) error + CreateDatabase(ctx context.Context, param *CreateDataBaseParam) error + DropDatabase(ctx context.Context, param *DropDataBaseParam) error + + ReplicateMessage(ctx context.Context, param *ReplicateMessageParam) error + // NOTE: please add the implements for the DataHandlerWrapper class when adding new interfaces +} + +type DefaultDataHandler struct{} + +func (d *DefaultDataHandler) CreateCollection(ctx context.Context, param *CreateCollectionParam) error { + log.Warn("CreateCollection is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) DropCollection(ctx context.Context, param *DropCollectionParam) error { + log.Warn("DropCollection is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) Insert(ctx context.Context, param *InsertParam) error { + log.Warn("Insert is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) Delete(ctx context.Context, param *DeleteParam) error { + log.Warn("Delete is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) CreatePartition(ctx context.Context, param *CreatePartitionParam) error { + log.Warn("CreatePartition is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) DropPartition(ctx context.Context, param *DropPartitionParam) error { + log.Warn("DropPartition is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) CreateIndex(ctx context.Context, param *CreateIndexParam) error { + log.Warn("CreateIndex is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) DropIndex(ctx context.Context, param *DropIndexParam) error { + log.Warn("DropIndex is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) LoadCollection(ctx context.Context, param *LoadCollectionParam) error { + log.Warn("LoadCollection is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) ReleaseCollection(ctx context.Context, param *ReleaseCollectionParam) error { + log.Warn("ReleaseCollection is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) CreateDatabase(ctx context.Context, param *CreateDataBaseParam) error { + log.Warn("CreateDatabase is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) DropDatabase(ctx context.Context, param *DropDataBaseParam) error { + log.Warn("DropDatabase is not implemented, please check it") + return nil +} + +func (d *DefaultDataHandler) ReplicateMessage(ctx context.Context, param *ReplicateMessageParam) error { + log.Warn("Replicate is not implemented, please check it") + return nil +} + +type CreateCollectionParam struct { + Schema *entity.Schema + ShardsNum int32 + ConsistencyLevel commonpb.ConsistencyLevel + Properties []*commonpb.KeyValuePair +} + +type DropCollectionParam struct { + CollectionName string +} + +type InsertParam struct { + CollectionName string + PartitionName string + Columns []entity.Column +} + +type DeleteParam struct { + CollectionName string + PartitionName string + Column entity.Column +} + +type CreatePartitionParam struct { + CollectionName string + PartitionName string +} + +type DropPartitionParam struct { + CollectionName string + PartitionName string +} + +type CreateIndexParam struct { + milvuspb.CreateIndexRequest +} + +type DropIndexParam struct { + milvuspb.DropIndexRequest +} + +type LoadCollectionParam struct { + milvuspb.LoadCollectionRequest +} + +type ReleaseCollectionParam struct { + milvuspb.ReleaseCollectionRequest +} + +type CreateDataBaseParam struct { + milvuspb.CreateDatabaseRequest +} + +type DropDataBaseParam struct { + milvuspb.DropDatabaseRequest +} + +type ReplicateMessageParam struct { + ChannelName string + BeginTs, EndTs uint64 + MsgsBytes [][]byte + StartPositions, EndPositions []*msgpb.MsgPosition +} diff --git a/core/api/message_manager.go b/core/api/message_manager.go new file mode 100644 index 00000000..011fc866 --- /dev/null +++ b/core/api/message_manager.go @@ -0,0 +1,12 @@ +package api + +type MessageManager interface { + ReplicateMessage(message *ReplicateMessage) + Close(channelName string) +} + +type ReplicateMessage struct { + Param *ReplicateMessageParam + SuccessFunc func(param *ReplicateMessageParam) + FailFunc func(param *ReplicateMessageParam, err error) +} diff --git a/core/api/meta_op.go b/core/api/meta_op.go new file mode 100644 index 00000000..dc4af131 --- /dev/null +++ b/core/api/meta_op.go @@ -0,0 +1,39 @@ +package api + +import ( + "context" + + "github.com/zilliztech/milvus-cdc/core/pb" +) + +// MetaOp meta operation +type MetaOp interface { + // WatchCollection its implementation should make sure it's only called once. The WatchPartition is same + WatchCollection(ctx context.Context, filter CollectionFilter) + WatchPartition(ctx context.Context, filter PartitionFilter) + + // SubscribeCollectionEvent an event only is consumed once. The SubscribePartitionEvent is same + // TODO need to consider the many target, maybe try the method a meta op corresponds to a target + SubscribeCollectionEvent(taskID string, consumer CollectionEventConsumer) + SubscribePartitionEvent(taskID string, consumer PartitionEventConsumer) + UnsubscribeEvent(taskID string, eventType WatchEventType) + + GetAllCollection(ctx context.Context, filter CollectionFilter) ([]*pb.CollectionInfo, error) + GetCollectionNameByID(ctx context.Context, id int64) string +} + +// CollectionFilter the filter will be used before the collection is filled the schema info +type CollectionFilter func(*pb.CollectionInfo) bool + +type PartitionFilter func(info *pb.PartitionInfo) bool + +type CollectionEventConsumer CollectionFilter + +type PartitionEventConsumer PartitionFilter + +type WatchEventType int + +const ( + CollectionEventType WatchEventType = iota + 1 + PartitionEventType +) diff --git a/core/api/reader.go b/core/api/reader.go new file mode 100644 index 00000000..aeaf2052 --- /dev/null +++ b/core/api/reader.go @@ -0,0 +1,26 @@ +package api + +import ( + "context" + + "github.com/milvus-io/milvus/pkg/log" +) + +type Reader interface { + StartRead(ctx context.Context) + QuitRead(ctx context.Context) + GetChannelChan() <-chan string +} + +// DefaultReader All CDCReader implements should combine it +type DefaultReader struct{} + +// StartRead the return value is nil, +// and if you receive the data from the nil chan, will block forever, not panic +func (d *DefaultReader) StartRead(ctx context.Context) { + log.Warn("StartRead is not implemented, please check it") +} + +func (d *DefaultReader) QuitRead(ctx context.Context) { + log.Warn("QuitRead is not implemented, please check it") +} diff --git a/core/api/replicate_manager.go b/core/api/replicate_manager.go new file mode 100644 index 00000000..0ca15ada --- /dev/null +++ b/core/api/replicate_manager.go @@ -0,0 +1,41 @@ +package api + +import ( + "context" + + "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" + "github.com/milvus-io/milvus/pkg/mq/msgstream" + + "github.com/zilliztech/milvus-cdc/core/model" + "github.com/zilliztech/milvus-cdc/core/pb" +) + +// ChannelManager a target must promise a manager +type ChannelManager interface { + StartReadCollection(ctx context.Context, info *pb.CollectionInfo, seekPositions []*msgpb.MsgPosition) error + + StopReadCollection(ctx context.Context, info *pb.CollectionInfo) error + + GetChannelChan() <-chan string + GetMsgChan(pChannel string) <-chan *msgstream.MsgPack + GetEventChan() <-chan *ReplicateAPIEvent +} + +type TargetAPI interface { + GetCollectionInfo(ctx context.Context, collectionName string) (*model.CollectionInfo, error) +} + +type ReplicateAPIEvent struct { + EventType ReplicateAPIEventType + CollectionInfo *pb.CollectionInfo + PartitionInfo *pb.PartitionInfo +} + +type ReplicateAPIEventType int + +const ( + ReplicateCreateCollection = iota + 1 + ReplicateDropCollection + ReplicateCreatePartition + ReplicateDropPartition +) diff --git a/core/api/writer.go b/core/api/writer.go new file mode 100644 index 00000000..4239da95 --- /dev/null +++ b/core/api/writer.go @@ -0,0 +1,13 @@ +package api + +import ( + "context" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus/pkg/mq/msgstream" +) + +type Writer interface { + HandleReplicateAPIEvent(ctx context.Context, apiEvent *ReplicateAPIEvent) error + HandleReplicateMessage(ctx context.Context, channelName string, msgPack *msgstream.MsgPack) (*commonpb.KeyDataPair, error) +} diff --git a/core/config/mq.go b/core/config/mq.go index fe35d827..d4418a32 100644 --- a/core/config/mq.go +++ b/core/config/mq.go @@ -44,6 +44,11 @@ func NewParamGroup() paramtable.ParamGroup { return group } +type MQConfig struct { + Pulsar PulsarConfig + Kafka KafkaConfig +} + type KafkaConfig struct { Address string } diff --git a/core/go.mod b/core/go.mod index ef2a5e37..b3baec77 100644 --- a/core/go.mod +++ b/core/go.mod @@ -4,16 +4,15 @@ go 1.18 require ( github.com/cockroachdb/errors v1.9.1 - github.com/goccy/go-json v0.10.2 github.com/golang/protobuf v1.5.3 - github.com/milvus-io/milvus-proto/go-api/v2 v2.3.1-0.20230911111453-720fcfb1a048 + github.com/milvus-io/milvus-proto/go-api/v2 v2.3.2-0.20230919092633-6ef446ad2aab github.com/milvus-io/milvus-sdk-go/v2 v2.2.1-0.20230814034926-dd5a31f64225 github.com/milvus-io/milvus/pkg v0.0.2-0.20230823021022-7af0f7d90cee github.com/samber/lo v1.27.0 github.com/stretchr/testify v1.8.3 - go.etcd.io/etcd/api/v3 v3.5.5 go.etcd.io/etcd/client/v3 v3.5.5 go.uber.org/zap v1.20.0 + sigs.k8s.io/yaml v1.2.0 ) require ( @@ -104,6 +103,7 @@ require ( github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect go.etcd.io/bbolt v1.3.6 // indirect + go.etcd.io/etcd/api/v3 v3.5.5 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.5 // indirect go.etcd.io/etcd/client/v2 v2.305.5 // indirect go.etcd.io/etcd/pkg/v3 v3.5.5 // indirect @@ -137,13 +137,12 @@ require ( gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - sigs.k8s.io/yaml v1.2.0 // indirect ) replace ( github.com/apache/pulsar-client-go => github.com/milvus-io/pulsar-client-go v0.6.10 - github.com/milvus-io/milvus-sdk-go/v2 => github.com/SimFG/milvus-sdk-go/v2 v2.0.0-20230918025012-e4fb30541113 - github.com/milvus-io/milvus/pkg => github.com/SimFG/milvus/pkg v0.0.0-20230915085959-b1bd79e12920 + github.com/milvus-io/milvus-sdk-go/v2 => github.com/SimFG/milvus-sdk-go/v2 v2.0.0-20230919094145-06acf1ab753c + github.com/milvus-io/milvus/pkg => github.com/SimFG/milvus/pkg v0.0.0-20230925083123-8c1cd0c4b615 github.com/streamnative/pulsarctl => github.com/xiaofan-luan/pulsarctl v0.5.1 github.com/tecbot/gorocksdb => ./../rocksdb ) diff --git a/core/go.sum b/core/go.sum index 35f0b9c4..377ed8eb 100644 --- a/core/go.sum +++ b/core/go.sum @@ -57,10 +57,10 @@ github.com/DataDog/zstd v1.5.0/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwS github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= -github.com/SimFG/milvus-sdk-go/v2 v2.0.0-20230918025012-e4fb30541113 h1:jnAHd3Na5AjPZxglIAD/Y3vl23LgMHtKTszxHtKyK2M= -github.com/SimFG/milvus-sdk-go/v2 v2.0.0-20230918025012-e4fb30541113/go.mod h1:L16rBZPU1/s98sDCPUn6WnDwBDPjJpRF2uNIvqe4c2g= -github.com/SimFG/milvus/pkg v0.0.0-20230915085959-b1bd79e12920 h1:swgxlJ4d3fn36dznNDCKU9XKwRtxQ0p7S84eTQFV8dI= -github.com/SimFG/milvus/pkg v0.0.0-20230915085959-b1bd79e12920/go.mod h1:OLjNOTL5QNP0iMgAUOKqP+qvYNulGno+F0O1HUVRCYU= +github.com/SimFG/milvus-sdk-go/v2 v2.0.0-20230919094145-06acf1ab753c h1:1S40miUo1RfuucrFheszLHW9j4nqR3aFFH0QUAifRAA= +github.com/SimFG/milvus-sdk-go/v2 v2.0.0-20230919094145-06acf1ab753c/go.mod h1:O1gKEbj3snNwETxglX0y3nIZNdQnWP57txQ1nphpM1o= +github.com/SimFG/milvus/pkg v0.0.0-20230925083123-8c1cd0c4b615 h1:dErrHuWnWHWxSbC58BnTwZW1wLNDi9WqwbMbINNQS2w= +github.com/SimFG/milvus/pkg v0.0.0-20230925083123-8c1cd0c4b615/go.mod h1:SGW0KrIpHihP4GmWVfiQDW3p/YsQijrv0aeu5tNvyE8= github.com/actgardner/gogen-avro/v10 v10.1.0/go.mod h1:o+ybmVjEa27AAr35FRqU98DJu1fXES56uXniYFv4yDA= github.com/actgardner/gogen-avro/v10 v10.2.1/go.mod h1:QUhjeHPchheYmMDni/Nx7VB0RsT/ee8YIgGY/xpEQgQ= github.com/actgardner/gogen-avro/v9 v9.1.0/go.mod h1:nyTj6wPqDJoxM3qdnjcLv+EnMDSDFqE0qDpva2QRmKc= @@ -220,8 +220,6 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78 github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= -github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 h1:ZpnhV/YsD2/4cESfV5+Hoeu/iUR3ruzNvZ+yQfO03a0= github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -467,8 +465,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/milvus-io/milvus-proto/go-api/v2 v2.3.1-0.20230911111453-720fcfb1a048 h1:rv9oEZ8gkrKhxb07E5utRrxZW70tK2xrmZBW9Md6Z2Y= -github.com/milvus-io/milvus-proto/go-api/v2 v2.3.1-0.20230911111453-720fcfb1a048/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek= +github.com/milvus-io/milvus-proto/go-api/v2 v2.3.2-0.20230919092633-6ef446ad2aab h1:XJtS30t72wLCnhI9EzaWCA8Hr5zROHj2/S+YYV4+hIs= +github.com/milvus-io/milvus-proto/go-api/v2 v2.3.2-0.20230919092633-6ef446ad2aab/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek= github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A= github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w= github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g= diff --git a/core/main/with_ts.go b/core/main/with_ts.go index 0b162bb0..d95f0533 100644 --- a/core/main/with_ts.go +++ b/core/main/with_ts.go @@ -19,16 +19,18 @@ import ( "github.com/milvus-io/milvus-sdk-go/v2/client" "github.com/milvus-io/milvus-sdk-go/v2/entity" "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/mq/msgstream/mqwrapper" "github.com/milvus-io/milvus/pkg/util/commonpbutil" "github.com/milvus-io/milvus/pkg/util/paramtable" - "github.com/zilliztech/milvus-cdc/core/config" - "github.com/zilliztech/milvus-cdc/core/pb" - "github.com/zilliztech/milvus-cdc/core/util" clientv3 "go.etcd.io/etcd/client/v3" "go.uber.org/zap" "sigs.k8s.io/yaml" + + "github.com/zilliztech/milvus-cdc/core/config" + "github.com/zilliztech/milvus-cdc/core/pb" + "github.com/zilliztech/milvus-cdc/core/util" ) // cdc 在同步前需要确定之前是否已经同步了该channel,处理channel复用出现的case, @@ -36,7 +38,7 @@ import ( // 如果一个正在watch的流出现了createCollectionMsg,这之后需要查询下这个collection是否被watch了 var ( - log = util.Log + // log = util.Log milvusCli client.Client etcdCli *clientv3.Client @@ -586,7 +588,7 @@ func SendMsgPack(pChannel string, vchannnels []string, collectionID int64, parti for _, position := range pack.EndPositions { position.ChannelName = pChannel } - err := milvusCli.ReplicateMessage(ctx, pChannel, + _, err := milvusCli.ReplicateMessage(ctx, pChannel, pack.BeginTs, pack.EndTs, msgBytesArr, pack.StartPositions, pack.EndPositions, @@ -693,7 +695,7 @@ func SendMsgPack2(sourceChannel, pChannel string, pack *msgstream.MsgPack) { for _, position := range pack.EndPositions { position.ChannelName = pChannel } - err := milvusCli.ReplicateMessage(ctx, pChannel, + _, err := milvusCli.ReplicateMessage(ctx, pChannel, pack.BeginTs, pack.EndTs, msgBytesArr, pack.StartPositions, pack.EndPositions, diff --git a/core/mocks/cdc_reader_mock.go b/core/mocks/cdc_reader_mock.go deleted file mode 100644 index e129b6c6..00000000 --- a/core/mocks/cdc_reader_mock.go +++ /dev/null @@ -1,58 +0,0 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. - -package mocks - -import ( - context "context" - - mock "github.com/stretchr/testify/mock" - model "github.com/zilliztech/milvus-cdc/core/model" - "github.com/zilliztech/milvus-cdc/core/util" -) - -// CDCReader is an autogenerated mock type for the CDCReader type -type CDCReader struct { - util.CDCMark - mock.Mock -} - -// QuitRead provides a mock function with given fields: ctx -func (_m *CDCReader) QuitRead(ctx context.Context) { - _m.Called(ctx) -} - -// StartRead provides a mock function with given fields: ctx -func (_m *CDCReader) StartRead(ctx context.Context) <-chan *model.CDCData { - ret := _m.Called(ctx) - - var r0 <-chan *model.CDCData - if rf, ok := ret.Get(0).(func(context.Context) <-chan *model.CDCData); ok { - r0 = rf(ctx) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(<-chan *model.CDCData) - } - } - - return r0 -} - -// cdc provides a mock function with given fields: -func (_m *CDCReader) cdc() { - _m.Called() -} - -type mockConstructorTestingTNewCDCReader interface { - mock.TestingT - Cleanup(func()) -} - -// NewCDCReader creates a new instance of CDCReader. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewCDCReader(t mockConstructorTestingTNewCDCReader) *CDCReader { - mock := &CDCReader{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/core/mocks/cdc_writer_mock.go b/core/mocks/cdc_writer_mock.go deleted file mode 100644 index ac7b1cdd..00000000 --- a/core/mocks/cdc_writer_mock.go +++ /dev/null @@ -1,57 +0,0 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. - -package mocks - -import ( - context "context" - - mock "github.com/stretchr/testify/mock" - model "github.com/zilliztech/milvus-cdc/core/model" - "github.com/zilliztech/milvus-cdc/core/util" - writer "github.com/zilliztech/milvus-cdc/core/writer" -) - -// CDCWriter is an autogenerated mock type for the CDCWriter type -type CDCWriter struct { - util.CDCMark - mock.Mock -} - -// Flush provides a mock function with given fields: _a0 -func (_m *CDCWriter) Flush(_a0 context.Context) { - _m.Called(_a0) -} - -// Write provides a mock function with given fields: _a0, data, callback -func (_m *CDCWriter) Write(_a0 context.Context, data *model.CDCData, callback writer.WriteCallback) error { - ret := _m.Called(_a0, data, callback) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, *model.CDCData, writer.WriteCallback) error); ok { - r0 = rf(_a0, data, callback) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// cdc provides a mock function with given fields: -func (_m *CDCWriter) cdc() { - _m.Called() -} - -type mockConstructorTestingTNewCDCWriter interface { - mock.TestingT - Cleanup(func()) -} - -// NewCDCWriter creates a new instance of CDCWriter. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewCDCWriter(t mockConstructorTestingTNewCDCWriter) *CDCWriter { - mock := &CDCWriter{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/core/mocks/factory_creator_mock.go b/core/mocks/factory_creator_mock.go deleted file mode 100644 index 5420cdbb..00000000 --- a/core/mocks/factory_creator_mock.go +++ /dev/null @@ -1,159 +0,0 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. - -package mocks - -import ( - "github.com/milvus-io/milvus/pkg/mq/msgstream" - mock "github.com/stretchr/testify/mock" - config "github.com/zilliztech/milvus-cdc/core/config" - "github.com/zilliztech/milvus-cdc/core/util" -) - -// FactoryCreator is an autogenerated mock type for the FactoryCreator type -type FactoryCreator struct { - util.CDCMark - mock.Mock -} - -type FactoryCreator_Expecter struct { - mock *mock.Mock -} - -func (_m *FactoryCreator) EXPECT() *FactoryCreator_Expecter { - return &FactoryCreator_Expecter{mock: &_m.Mock} -} - -// NewKmsFactory provides a mock function with given fields: cfg -func (_m *FactoryCreator) NewKmsFactory(cfg *config.KafkaConfig) msgstream.Factory { - ret := _m.Called(cfg) - - var r0 msgstream.Factory - if rf, ok := ret.Get(0).(func(*config.KafkaConfig) msgstream.Factory); ok { - r0 = rf(cfg) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(msgstream.Factory) - } - } - - return r0 -} - -// FactoryCreator_NewKmsFactory_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'NewKmsFactory' -type FactoryCreator_NewKmsFactory_Call struct { - *mock.Call -} - -// NewKmsFactory is a helper method to define mock.On call -// - cfg *config.KafkaConfig -func (_e *FactoryCreator_Expecter) NewKmsFactory(cfg interface{}) *FactoryCreator_NewKmsFactory_Call { - return &FactoryCreator_NewKmsFactory_Call{Call: _e.mock.On("NewKmsFactory", cfg)} -} - -func (_c *FactoryCreator_NewKmsFactory_Call) Run(run func(cfg *config.KafkaConfig)) *FactoryCreator_NewKmsFactory_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*config.KafkaConfig)) - }) - return _c -} - -func (_c *FactoryCreator_NewKmsFactory_Call) Return(_a0 msgstream.Factory) *FactoryCreator_NewKmsFactory_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *FactoryCreator_NewKmsFactory_Call) RunAndReturn(run func(*config.KafkaConfig) msgstream.Factory) *FactoryCreator_NewKmsFactory_Call { - _c.Call.Return(run) - return _c -} - -// NewPmsFactory provides a mock function with given fields: cfg -func (_m *FactoryCreator) NewPmsFactory(cfg *config.PulsarConfig) msgstream.Factory { - ret := _m.Called(cfg) - - var r0 msgstream.Factory - if rf, ok := ret.Get(0).(func(*config.PulsarConfig) msgstream.Factory); ok { - r0 = rf(cfg) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(msgstream.Factory) - } - } - - return r0 -} - -// FactoryCreator_NewPmsFactory_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'NewPmsFactory' -type FactoryCreator_NewPmsFactory_Call struct { - *mock.Call -} - -// NewPmsFactory is a helper method to define mock.On call -// - cfg *config.PulsarConfig -func (_e *FactoryCreator_Expecter) NewPmsFactory(cfg interface{}) *FactoryCreator_NewPmsFactory_Call { - return &FactoryCreator_NewPmsFactory_Call{Call: _e.mock.On("NewPmsFactory", cfg)} -} - -func (_c *FactoryCreator_NewPmsFactory_Call) Run(run func(cfg *config.PulsarConfig)) *FactoryCreator_NewPmsFactory_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*config.PulsarConfig)) - }) - return _c -} - -func (_c *FactoryCreator_NewPmsFactory_Call) Return(_a0 msgstream.Factory) *FactoryCreator_NewPmsFactory_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *FactoryCreator_NewPmsFactory_Call) RunAndReturn(run func(*config.PulsarConfig) msgstream.Factory) *FactoryCreator_NewPmsFactory_Call { - _c.Call.Return(run) - return _c -} - -// cdc provides a mock function with given fields: -func (_m *FactoryCreator) cdc() { - _m.Called() -} - -// FactoryCreator_cdc_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'cdc' -type FactoryCreator_cdc_Call struct { - *mock.Call -} - -// cdc is a helper method to define mock.On call -func (_e *FactoryCreator_Expecter) cdc() *FactoryCreator_cdc_Call { - return &FactoryCreator_cdc_Call{Call: _e.mock.On("cdc")} -} - -func (_c *FactoryCreator_cdc_Call) Run(run func()) *FactoryCreator_cdc_Call { - _c.Call.Run(func(args mock.Arguments) { - run() - }) - return _c -} - -func (_c *FactoryCreator_cdc_Call) Return() *FactoryCreator_cdc_Call { - _c.Call.Return() - return _c -} - -func (_c *FactoryCreator_cdc_Call) RunAndReturn(run func()) *FactoryCreator_cdc_Call { - _c.Call.Return(run) - return _c -} - -type mockConstructorTestingTNewFactoryCreator interface { - mock.TestingT - Cleanup(func()) -} - -// NewFactoryCreator creates a new instance of FactoryCreator. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewFactoryCreator(t mockConstructorTestingTNewFactoryCreator) *FactoryCreator { - mock := &FactoryCreator{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/core/mocks/milvus_client_api_mock.go b/core/mocks/milvus_client_api_mock.go deleted file mode 100644 index ccde4c8c..00000000 --- a/core/mocks/milvus_client_api_mock.go +++ /dev/null @@ -1,248 +0,0 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. - -package mocks - -import ( - context "context" - - client "github.com/milvus-io/milvus-sdk-go/v2/client" - entity "github.com/milvus-io/milvus-sdk-go/v2/entity" - mock "github.com/stretchr/testify/mock" - "github.com/zilliztech/milvus-cdc/core/util" -) - -// MilvusClientAPI is an autogenerated mock type for the MilvusClientAPI type -type MilvusClientAPI struct { - util.CDCMark - mock.Mock -} - -// CreateCollection provides a mock function with given fields: ctx, schema, shardsNum, opts -func (_m *MilvusClientAPI) CreateCollection(ctx context.Context, schema *entity.Schema, shardsNum int32, opts ...client.CreateCollectionOption) error { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, schema, shardsNum) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, *entity.Schema, int32, ...client.CreateCollectionOption) error); ok { - r0 = rf(ctx, schema, shardsNum, opts...) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// CreateDatabase provides a mock function with given fields: ctx, dbName -func (_m *MilvusClientAPI) CreateDatabase(ctx context.Context, dbName string) error { - ret := _m.Called(ctx, dbName) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string) error); ok { - r0 = rf(ctx, dbName) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// CreateIndex provides a mock function with given fields: ctx, collName, fieldName, idx, async, opts -func (_m *MilvusClientAPI) CreateIndex(ctx context.Context, collName string, fieldName string, idx entity.Index, async bool, opts ...client.IndexOption) error { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, collName, fieldName, idx, async) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, entity.Index, bool, ...client.IndexOption) error); ok { - r0 = rf(ctx, collName, fieldName, idx, async, opts...) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// CreatePartition provides a mock function with given fields: ctx, collName, partitionName -func (_m *MilvusClientAPI) CreatePartition(ctx context.Context, collName string, partitionName string) error { - ret := _m.Called(ctx, collName, partitionName) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string, string) error); ok { - r0 = rf(ctx, collName, partitionName) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// DeleteByPks provides a mock function with given fields: ctx, collName, partitionName, ids -func (_m *MilvusClientAPI) DeleteByPks(ctx context.Context, collName string, partitionName string, ids entity.Column) error { - ret := _m.Called(ctx, collName, partitionName, ids) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, entity.Column) error); ok { - r0 = rf(ctx, collName, partitionName, ids) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// DropCollection provides a mock function with given fields: ctx, collName -func (_m *MilvusClientAPI) DropCollection(ctx context.Context, collName string) error { - ret := _m.Called(ctx, collName) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string) error); ok { - r0 = rf(ctx, collName) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// DropDatabase provides a mock function with given fields: ctx, dbName -func (_m *MilvusClientAPI) DropDatabase(ctx context.Context, dbName string) error { - ret := _m.Called(ctx, dbName) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string) error); ok { - r0 = rf(ctx, dbName) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// DropIndex provides a mock function with given fields: ctx, collName, fieldName, opts -func (_m *MilvusClientAPI) DropIndex(ctx context.Context, collName string, fieldName string, opts ...client.IndexOption) error { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, collName, fieldName) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, ...client.IndexOption) error); ok { - r0 = rf(ctx, collName, fieldName, opts...) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// DropPartition provides a mock function with given fields: ctx, collName, partitionName -func (_m *MilvusClientAPI) DropPartition(ctx context.Context, collName string, partitionName string) error { - ret := _m.Called(ctx, collName, partitionName) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string, string) error); ok { - r0 = rf(ctx, collName, partitionName) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Insert provides a mock function with given fields: ctx, collName, partitionName, columns -func (_m *MilvusClientAPI) Insert(ctx context.Context, collName string, partitionName string, columns ...entity.Column) (entity.Column, error) { - _va := make([]interface{}, len(columns)) - for _i := range columns { - _va[_i] = columns[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, collName, partitionName) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 entity.Column - var r1 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, ...entity.Column) (entity.Column, error)); ok { - return rf(ctx, collName, partitionName, columns...) - } - if rf, ok := ret.Get(0).(func(context.Context, string, string, ...entity.Column) entity.Column); ok { - r0 = rf(ctx, collName, partitionName, columns...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(entity.Column) - } - } - - if rf, ok := ret.Get(1).(func(context.Context, string, string, ...entity.Column) error); ok { - r1 = rf(ctx, collName, partitionName, columns...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// LoadCollection provides a mock function with given fields: ctx, collName, async, opts -func (_m *MilvusClientAPI) LoadCollection(ctx context.Context, collName string, async bool, opts ...client.LoadCollectionOption) error { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, collName, async) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string, bool, ...client.LoadCollectionOption) error); ok { - r0 = rf(ctx, collName, async, opts...) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// ReleaseCollection provides a mock function with given fields: ctx, collName -func (_m *MilvusClientAPI) ReleaseCollection(ctx context.Context, collName string) error { - ret := _m.Called(ctx, collName) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string) error); ok { - r0 = rf(ctx, collName) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -type mockConstructorTestingTNewMilvusClientAPI interface { - mock.TestingT - Cleanup(func()) -} - -// NewMilvusClientAPI creates a new instance of MilvusClientAPI. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewMilvusClientAPI(t mockConstructorTestingTNewMilvusClientAPI) *MilvusClientAPI { - mock := &MilvusClientAPI{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/core/mocks/milvus_client_factory_mock.go b/core/mocks/milvus_client_factory_mock.go deleted file mode 100644 index e896cbd2..00000000 --- a/core/mocks/milvus_client_factory_mock.go +++ /dev/null @@ -1,116 +0,0 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. - -package mocks - -import ( - context "context" - - "github.com/zilliztech/milvus-cdc/core/util" - - mock "github.com/stretchr/testify/mock" - writer "github.com/zilliztech/milvus-cdc/core/writer" -) - -// MilvusClientFactory is an autogenerated mock type for the MilvusClientFactory type -type MilvusClientFactory struct { - util.CDCMark - mock.Mock -} - -// NewGrpcClient provides a mock function with given fields: ctx, addr -func (_m *MilvusClientFactory) NewGrpcClient(ctx context.Context, addr string) (writer.MilvusClientAPI, error) { - ret := _m.Called(ctx, addr) - - var r0 writer.MilvusClientAPI - var r1 error - if rf, ok := ret.Get(0).(func(context.Context, string) (writer.MilvusClientAPI, error)); ok { - return rf(ctx, addr) - } - if rf, ok := ret.Get(0).(func(context.Context, string) writer.MilvusClientAPI); ok { - r0 = rf(ctx, addr) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(writer.MilvusClientAPI) - } - } - - if rf, ok := ret.Get(1).(func(context.Context, string) error); ok { - r1 = rf(ctx, addr) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// NewGrpcClientWithAuth provides a mock function with given fields: ctx, addr, username, password -func (_m *MilvusClientFactory) NewGrpcClientWithAuth(ctx context.Context, addr string, username string, password string) (writer.MilvusClientAPI, error) { - ret := _m.Called(ctx, addr, username, password) - - var r0 writer.MilvusClientAPI - var r1 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, string) (writer.MilvusClientAPI, error)); ok { - return rf(ctx, addr, username, password) - } - if rf, ok := ret.Get(0).(func(context.Context, string, string, string) writer.MilvusClientAPI); ok { - r0 = rf(ctx, addr, username, password) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(writer.MilvusClientAPI) - } - } - - if rf, ok := ret.Get(1).(func(context.Context, string, string, string) error); ok { - r1 = rf(ctx, addr, username, password) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// NewGrpcClientWithTLSAuth provides a mock function with given fields: ctx, addr, username, password -func (_m *MilvusClientFactory) NewGrpcClientWithTLSAuth(ctx context.Context, addr string, username string, password string) (writer.MilvusClientAPI, error) { - ret := _m.Called(ctx, addr, username, password) - - var r0 writer.MilvusClientAPI - var r1 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, string) (writer.MilvusClientAPI, error)); ok { - return rf(ctx, addr, username, password) - } - if rf, ok := ret.Get(0).(func(context.Context, string, string, string) writer.MilvusClientAPI); ok { - r0 = rf(ctx, addr, username, password) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(writer.MilvusClientAPI) - } - } - - if rf, ok := ret.Get(1).(func(context.Context, string, string, string) error); ok { - r1 = rf(ctx, addr, username, password) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// cdc provides a mock function with given fields: -func (_m *MilvusClientFactory) cdc() { - _m.Called() -} - -type mockConstructorTestingTNewMilvusClientFactory interface { - mock.TestingT - Cleanup(func()) -} - -// NewMilvusClientFactory creates a new instance of MilvusClientFactory. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewMilvusClientFactory(t mockConstructorTestingTNewMilvusClientFactory) *MilvusClientFactory { - mock := &MilvusClientFactory{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/core/mocks/monitor_mock.go b/core/mocks/monitor_mock.go deleted file mode 100644 index 780b82de..00000000 --- a/core/mocks/monitor_mock.go +++ /dev/null @@ -1,69 +0,0 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. - -package mocks - -import ( - mock "github.com/stretchr/testify/mock" - "github.com/zilliztech/milvus-cdc/core/util" -) - -// Monitor is an autogenerated mock type for the Monitor type -type Monitor struct { - util.CDCMark - mock.Mock -} - -// OnFailGetCollectionInfo provides a mock function with given fields: collectionID, collectionName, err -func (_m *Monitor) OnFailGetCollectionInfo(collectionID int64, collectionName string, err error) { - _m.Called(collectionID, collectionName, err) -} - -// OnFailReadStream provides a mock function with given fields: collectionID, collectionName, vchannel, err -func (_m *Monitor) OnFailReadStream(collectionID int64, collectionName string, vchannel string, err error) { - _m.Called(collectionID, collectionName, vchannel, err) -} - -// OnFailUnKnowCollection provides a mock function with given fields: key, err -func (_m *Monitor) OnFailUnKnowCollection(key string, err error) { - _m.Called(key, err) -} - -// OnFilterReadMsg provides a mock function with given fields: msgType -func (_m *Monitor) OnFilterReadMsg(msgType string) { - _m.Called(msgType) -} - -// OnSuccessGetACollectionInfo provides a mock function with given fields: collectionID, collectionName -func (_m *Monitor) OnSuccessGetACollectionInfo(collectionID int64, collectionName string) { - _m.Called(collectionID, collectionName) -} - -// OnSuccessGetAllCollectionInfo provides a mock function with given fields: -func (_m *Monitor) OnSuccessGetAllCollectionInfo() { - _m.Called() -} - -// WatchChanClosed provides a mock function with given fields: -func (_m *Monitor) WatchChanClosed() { - _m.Called() -} - -// cdc provides a mock function with given fields: -func (_m *Monitor) cdc() { - _m.Called() -} - -type mockConstructorTestingTNewMonitor interface { - mock.TestingT - Cleanup(func()) -} - -// NewMonitor creates a new instance of Monitor. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewMonitor(t mockConstructorTestingTNewMonitor) *Monitor { - mock := &Monitor{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/core/mocks/write_callbakc_mock.go b/core/mocks/write_callbakc_mock.go deleted file mode 100644 index d4340a59..00000000 --- a/core/mocks/write_callbakc_mock.go +++ /dev/null @@ -1,47 +0,0 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. - -package mocks - -import ( - mock "github.com/stretchr/testify/mock" - model "github.com/zilliztech/milvus-cdc/core/model" - "github.com/zilliztech/milvus-cdc/core/util" - - writer "github.com/zilliztech/milvus-cdc/core/writer" -) - -// WriteCallback is an autogenerated mock type for the WriteCallback type -type WriteCallback struct { - util.CDCMark - mock.Mock -} - -// OnFail provides a mock function with given fields: data, err -func (_m *WriteCallback) OnFail(data *model.CDCData, err error) { - _m.Called(data, err) -} - -// OnSuccess provides a mock function with given fields: collectionID, channelInfos -func (_m *WriteCallback) OnSuccess(collectionID int64, channelInfos map[string]writer.CallbackChannelInfo) { - _m.Called(collectionID, channelInfos) -} - -// cdc provides a mock function with given fields: -func (_m *WriteCallback) cdc() { - _m.Called() -} - -type mockConstructorTestingTNewWriteCallback interface { - mock.TestingT - Cleanup(func()) -} - -// NewWriteCallback creates a new instance of WriteCallback. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewWriteCallback(t mockConstructorTestingTNewWriteCallback) *WriteCallback { - mock := &WriteCallback{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/core/model/reader.go b/core/model/reader.go new file mode 100644 index 00000000..11263f52 --- /dev/null +++ b/core/model/reader.go @@ -0,0 +1,33 @@ +package model + +import ( + "github.com/milvus-io/milvus/pkg/mq/msgstream" +) + +type SourceCollectionInfo struct { + PChannelName string + CollectionID int64 + SeekPosition *msgstream.MsgPosition + ShardNum int +} + +type TargetCollectionInfo struct { + CollectionID int64 + PartitionInfo map[string]int64 + PChannel string + VChannel string + BarrierChan chan<- struct{} +} + +type HandlerOpts struct { + MessageBufferSize int + Factory msgstream.Factory +} + +type CollectionInfo struct { + CollectionID int64 + CollectionName string + VChannels []string + PChannels []string + Partitions map[string]int64 +} diff --git a/core/reader/channel_reader.go b/core/reader/channel_reader.go index 426013bb..d2d9e8d7 100644 --- a/core/reader/channel_reader.go +++ b/core/reader/channel_reader.go @@ -9,16 +9,19 @@ import ( "github.com/golang/protobuf/proto" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" + "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/mq/msgstream/mqwrapper" + "go.uber.org/zap" + + "github.com/zilliztech/milvus-cdc/core/api" "github.com/zilliztech/milvus-cdc/core/config" "github.com/zilliztech/milvus-cdc/core/model" "github.com/zilliztech/milvus-cdc/core/util" - "go.uber.org/zap" ) type ChannelReader struct { - DefaultReader + api.DefaultReader mqConfig config.MilvusMQConfig factoryCreator FactoryCreator diff --git a/core/reader/collection_reader.go b/core/reader/collection_reader.go new file mode 100644 index 00000000..556a572b --- /dev/null +++ b/core/reader/collection_reader.go @@ -0,0 +1,721 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package reader + +import ( + "context" + "sync" + + "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/mq/msgstream" + + "github.com/zilliztech/milvus-cdc/core/api" + "github.com/zilliztech/milvus-cdc/core/pb" + "github.com/zilliztech/milvus-cdc/core/util" +) + +// var log = util.Log + +const ( + AllCollection = "*" +) + +type CollectionInfo struct { + collectionName string + positions map[string]*commonpb.KeyDataPair +} + +type ShouldReadFunc func(*pb.CollectionInfo) bool + +var _ api.Reader = (*CollectionReader)(nil) + +type CollectionReader struct { + api.DefaultReader + + // etcdConfig config.MilvusEtcdConfig + // mqConfig config.MilvusMQConfig + // collections []CollectionInfo + // monitor Monitor + // dataChanLen int + // + // etcdCli util.KVApi + // factoryCreator FactoryCreator + // shouldReadFunc ShouldReadFunc + // dataChan chan *model.CDCData + // cancelWatch context.CancelFunc + // collectionID2Name util.Map[int64, string] + // closeStreamFuncs util.SafeArray[func()] + // + // isQuit util.Value[bool] + // + // // Please no read or write it excluding in the beginning of readStreamData method + // readingSteamCollection []int64 + // readingLock sync.Mutex + // + // dbID int64 + + id string + channelManager api.ChannelManager + metaOp api.MetaOp + channelSeekPositions map[string]*msgpb.MsgPosition + replicateCollectionMap util.Map[int64, *pb.CollectionInfo] + replicateChannelMap util.Map[string, struct{}] + replicateChannelChan chan string + shouldReadFunc ShouldReadFunc + startOnce sync.Once + quitOnce sync.Once +} + +func NewCollectionReader(id string, channelManager api.ChannelManager, metaOp api.MetaOp, seekPosition map[string]*msgpb.MsgPosition, shouldReadFunc ShouldReadFunc) (api.Reader, error) { + reader := &CollectionReader{ + id: id, + channelManager: channelManager, + metaOp: metaOp, + channelSeekPositions: seekPosition, + shouldReadFunc: shouldReadFunc, + replicateChannelChan: make(chan string, 10), + } + return reader, nil +} + +// func NewCollectionReader(options ...config.Option[*CollectionReader]) (*CollectionReader, error) { +// reader := &CollectionReader{ +// monitor: NewDefaultMonitor(), +// factoryCreator: NewDefaultFactoryCreator(), +// dataChanLen: 10, +// dbID: 1, +// } +// reader.shouldReadFunc = reader.getDefaultShouldReadFunc() +// for _, option := range options { +// option.Apply(reader) +// } +// var err error +// reader.etcdCli, err = util.GetEtcdClient(reader.etcdConfig.Endpoints) +// if err != nil { +// log.Warn("fail to get etcd client", zap.Error(err)) +// return nil, err +// } +// reader.dataChan = make(chan *model.CDCData, reader.dataChanLen) +// reader.isQuit.Store(false) +// return reader, nil +// } + +func (reader *CollectionReader) StartRead(ctx context.Context) { + reader.startOnce.Do(func() { + reader.metaOp.SubscribeCollectionEvent(reader.id, func(info *pb.CollectionInfo) bool { + log.Info("has watched to read collection", zap.String("name", info.Schema.Name)) + if !reader.shouldReadFunc(info) { + return false + } + startPositions := make([]*msgpb.MsgPosition, 0) + for _, v := range info.StartPositions { + startPositions = append(startPositions, &msgstream.MsgPosition{ + ChannelName: v.GetKey(), + MsgID: v.GetData(), + }) + // _, loaded := reader.replicateChannelMap.LoadOrStore(v.GetKey(), struct{}{}) + // if !loaded { + // reader.replicateChannelChan <- v.GetKey() + // } + } + if err := reader.channelManager.StartReadCollection(ctx, info, startPositions); err != nil { + log.Warn("fail to start to replicate the collection data in the watch process", zap.Int64("id", info.ID), zap.Error(err)) + } + reader.replicateCollectionMap.Store(info.ID, info) + log.Info("has started to read collection", zap.String("name", info.Schema.Name)) + return true + }) + // TODO partition + // reader.metaOp.SubscribePartitionEvent(reader.id, func(info *pb.PartitionInfo) bool { + // collectionName := reader.metaOp.GetCollectionNameByID(ctx, info.CollectionId) + // if collectionName == "" { + // return false + // } + // tmpCollectionInfo := &pb.CollectionInfo{ + // ID: info.CollectionId, + // Schema: &schemapb.CollectionSchema{ + // Name: collectionName, + // }, + // } + // if !reader.shouldReadFunc(tmpCollectionInfo) { + // return false + // } + // + // // TODO handle event, create partition + // return true + // }) + reader.metaOp.WatchCollection(ctx, nil) + // TODO partition + // reader.metaOp.WatchPartition(ctx, nil) + + existedCollectionInfos, err := reader.metaOp.GetAllCollection(ctx, func(info *pb.CollectionInfo) bool { + return !reader.shouldReadFunc(info) + }) + if err != nil { + log.Warn("get all collection failed", zap.Error(err)) + } + seekPositions := lo.Values(reader.channelSeekPositions) + for _, info := range existedCollectionInfos { + // for _, name := range info.PhysicalChannelNames { + // _, loaded := reader.replicateChannelMap.LoadOrStore(name, struct{}{}) + // if !loaded { + // reader.replicateChannelChan <- name + // } + // } + log.Info("exist collection", zap.String("name", info.Schema.Name)) + if err := reader.channelManager.StartReadCollection(ctx, info, seekPositions); err != nil { + log.Warn("fail to start to replicate the collection data", zap.Int64("id", info.ID), zap.Error(err)) + } + reader.replicateCollectionMap.Store(info.ID, info) + } + }) +} + +func (reader *CollectionReader) QuitRead(ctx context.Context) { + reader.quitOnce.Do(func() { + reader.replicateCollectionMap.Range(func(_ int64, value *pb.CollectionInfo) bool { + err := reader.channelManager.StopReadCollection(ctx, value) + if err != nil { + log.Warn("fail to stop read collection", zap.Error(err)) + } + return true + }) + reader.metaOp.UnsubscribeEvent(reader.id, api.CollectionEventType) + close(reader.replicateChannelChan) + }) +} + +func (reader *CollectionReader) GetChannelChan() <-chan string { + return reader.replicateChannelChan +} + +// func (reader *CollectionReader) getDefaultShouldReadFunc() ShouldReadFunc { +// return func(i *pb.CollectionInfo) bool { +// return lo.ContainsBy(reader.collections, func(info CollectionInfo) bool { +// return i.Schema.Name == info.collectionName +// }) +// } +// } + +// func (reader *CollectionReader) watchCollection(watchCtx context.Context) { +// // watch collection prefix to avoid new collection while getting the all collection +// // TODO improvement watch single instance +// watchChan := reader.etcdCli.Watch(watchCtx, reader.collectionPrefix()+"/", clientv3.WithPrefix()) +// for { +// select { +// case watchResp, ok := <-watchChan: +// if !ok { +// reader.monitor.WatchChanClosed() +// return +// } +// lo.ForEach(watchResp.Events, func(event *clientv3.Event, _ int) { +// if event.Type != clientv3.EventTypePut { +// return +// } +// collectionKey := util.ToString(event.Kv.Key) +// log.Info("collection key", zap.String("key", collectionKey)) +// if !strings.HasPrefix(collectionKey, reader.collectionPrefix()) { +// return +// } +// info := &pb.CollectionInfo{} +// err := proto.Unmarshal(event.Kv.Value, info) +// if err != nil { +// log.Warn("fail to unmarshal the collection info", zap.String("key", collectionKey), zap.String("value", util.Base64Encode(event.Kv.Value)), zap.Error(err)) +// reader.monitor.OnFailUnKnowCollection(collectionKey, err) +// return +// } +// if info.State == pb.CollectionState_CollectionCreated { +// go func() { +// log.Info("collection key created", zap.String("key", collectionKey)) +// if reader.shouldReadFunc(info) { +// log.Info("collection key should created", zap.String("key", collectionKey)) +// reader.collectionID2Name.Store(info.ID, reader.collectionName(info)) +// err := util.Do(context.Background(), func() error { +// err := reader.fillCollectionField(info) +// if err != nil { +// log.Info("fail to get collection fields, retry...", zap.String("key", collectionKey), zap.Error(err)) +// } +// return err +// }) +// if err != nil { +// log.Warn("fail to get collection fields", zap.String("key", collectionKey), zap.Error(err)) +// reader.monitor.OnFailGetCollectionInfo(info.ID, reader.collectionName(info), err) +// return +// } +// reader.readStreamData(info, true) +// } +// }() +// } +// }) +// case <-watchCtx.Done(): +// log.Info("watch collection context done") +// return +// } +// } +// } +// +// func (reader *CollectionReader) watchPartition(watchCtx context.Context) { +// watchChan := reader.etcdCli.Watch(watchCtx, reader.partitionPrefix()+"/", clientv3.WithPrefix()) +// for { +// select { +// case watchResp, ok := <-watchChan: +// if !ok { +// return +// } +// lo.ForEach(watchResp.Events, func(event *clientv3.Event, _ int) { +// if event.Type != clientv3.EventTypePut { +// return +// } +// partitionKey := util.ToString(event.Kv.Key) +// if !strings.HasPrefix(partitionKey, reader.partitionPrefix()) { +// return +// } +// id := reader.getCollectionIDFromPartitionKey(partitionKey) +// if id == 0 { +// log.Warn("fail to get the collection id", zap.String("key", partitionKey)) +// return +// } +// info := &pb.PartitionInfo{} +// err := proto.Unmarshal(event.Kv.Value, info) +// if err != nil { +// log.Warn("fail to unmarshal the partition info", zap.String("key", partitionKey), zap.String("value", util.Base64Encode(event.Kv.Value)), zap.Error(err)) +// // TODO monitor +// // reader.monitor.OnFailUnKnowCollection(collectionKey, err) +// return +// } +// if info.State == pb.PartitionState_PartitionCreated && +// info.PartitionName != reader.etcdConfig.DefaultPartitionName { +// collectionName, ok := reader.collectionID2Name.Load(id) +// if !ok { +// collectionName = reader.getCollectionNameByID(id) +// if collectionName == "" { +// log.Warn("not found the collection", zap.Int64("collection_id", id), +// zap.Int64("partition_id", info.PartitionID), +// zap.String("partition_name", info.PartitionName)) +// return +// } +// } +// data := &model.CDCData{ +// Msg: &msgstream.CreatePartitionMsg{ +// BaseMsg: msgstream.BaseMsg{}, +// CreatePartitionRequest: msgpb.CreatePartitionRequest{ +// Base: &commonpb.MsgBase{ +// MsgType: commonpb.MsgType_CreatePartition, +// }, +// CollectionName: collectionName, +// PartitionName: info.PartitionName, +// CollectionID: info.CollectionId, +// PartitionID: info.PartitionID, +// }, +// }, +// } +// reader.sendData(data) +// } +// }) +// case <-watchCtx.Done(): +// log.Info("watch partition context done") +// return +// } +// } +// } +// +// func (reader *CollectionReader) getCollectionNameByID(collectionID int64) string { +// var ( +// resp *clientv3.GetResponse +// err error +// ) +// +// if reader.dbID == 0 { +// resp, err = util.EtcdGet(reader.etcdCli, path.Join(reader.collectionPrefix(), strconv.FormatInt(collectionID, 10))) +// } else { +// resp, err = util.EtcdGet(reader.etcdCli, path.Join(reader.collectionPrefix(), strconv.FormatInt(reader.dbID, 10), strconv.FormatInt(collectionID, 10))) +// } +// if err != nil { +// log.Warn("fail to get all collection data", zap.Int64("collection_id", collectionID), zap.Error(err)) +// return "" +// } +// if len(resp.Kvs) == 0 { +// log.Warn("the collection isn't existed", zap.Int64("collection_id", collectionID)) +// return "" +// } +// info := &pb.CollectionInfo{} +// err = proto.Unmarshal(resp.Kvs[0].Value, info) +// if err != nil { +// log.Warn("fail to unmarshal collection info, maybe it's a deleted collection", +// zap.Int64("collection_id", collectionID), +// zap.String("value", util.Base64Encode(resp.Kvs[0].Value)), +// zap.Error(err)) +// return "" +// } +// collectionName := reader.collectionName(info) +// if reader.shouldReadFunc(info) { +// reader.collectionID2Name.Store(collectionID, collectionName) +// return collectionName +// } +// log.Warn("the collection can't be read", zap.Int64("id", collectionID), zap.String("name", collectionName)) +// return "" +// } +// +// func (reader *CollectionReader) getAllCollections() { +// var ( +// existedCollectionInfos []*pb.CollectionInfo +// err error +// ) +// +// existedCollectionInfos, err = reader.getCollectionInfo() +// if err != nil { +// log.Warn("fail to get collection", zap.Error(err)) +// reader.monitor.OnFailUnKnowCollection(reader.collectionPrefix(), err) +// } +// for _, info := range existedCollectionInfos { +// if info.State == pb.CollectionState_CollectionCreated { +// go reader.readStreamData(info, false) +// } +// } +// } +// +// func (reader *CollectionReader) collectionPrefix() string { +// c := reader.etcdConfig +// collectionKey := c.CollectionKey +// if reader.dbID != 0 { +// collectionKey = c.CollectionWithDBKey +// } +// return util.GetCollectionPrefix(c.RootPath, c.MetaSubPath, collectionKey) +// } +// +// func (reader *CollectionReader) partitionPrefix() string { +// c := reader.etcdConfig +// return util.GetPartitionPrefix(c.RootPath, c.MetaSubPath, c.PartitionKey) +// } +// +// func (reader *CollectionReader) fieldPrefix() string { +// c := reader.etcdConfig +// return util.GetFieldPrefix(c.RootPath, c.MetaSubPath, c.FiledKey) +// } +// +// func (reader *CollectionReader) collectionName(info *pb.CollectionInfo) string { +// return info.Schema.Name +// } +// +// func (reader *CollectionReader) getCollectionIDFromPartitionKey(key string) int64 { +// subStrs := strings.Split(key[len(reader.partitionPrefix())+1:], "/") +// if len(subStrs) != 2 { +// log.Warn("the key is invalid", zap.String("key", key), zap.Strings("sub", subStrs)) +// return 0 +// } +// id, err := strconv.ParseInt(subStrs[0], 10, 64) +// if err != nil { +// log.Warn("fail to parse the collection id", zap.String("id", subStrs[0]), zap.Error(err)) +// return 0 +// } +// return id +// } +// +// // getCollectionInfo The return value meanings are respectively: +// // 1. collection infos that the collection have existed +// // 2. error message +// func (reader *CollectionReader) getCollectionInfo() ([]*pb.CollectionInfo, error) { +// resp, err := util.EtcdGet(reader.etcdCli, reader.collectionPrefix()+"/", clientv3.WithPrefix()) +// if err != nil { +// log.Warn("fail to get all collection data", zap.Error(err)) +// return nil, err +// } +// var existedCollectionInfos []*pb.CollectionInfo +// +// for _, kv := range resp.Kvs { +// info := &pb.CollectionInfo{} +// err = proto.Unmarshal(kv.Value, info) +// if err != nil { +// log.Warn("fail to unmarshal collection info, maybe it's a deleted collection", zap.String("key", util.ToString(kv.Key)), zap.String("value", util.Base64Encode(kv.Value)), zap.Error(err)) +// continue +// } +// if reader.shouldReadFunc(info) { +// reader.collectionID2Name.Store(info.ID, reader.collectionName(info)) +// log.Info("get the collection that it need to be replicated", zap.String("name", reader.collectionName(info)), zap.String("key", util.ToString(kv.Key))) +// err = reader.fillCollectionField(info) +// if err != nil { +// return existedCollectionInfos, err +// } +// existedCollectionInfos = append(existedCollectionInfos, info) +// } +// } +// return existedCollectionInfos, nil +// } +// +// func (reader *CollectionReader) fillCollectionField(info *pb.CollectionInfo) error { +// filedPrefix := reader.fieldPrefix() +// prefix := path.Join(filedPrefix, strconv.FormatInt(info.ID, 10)) + "/" +// resp, err := util.EtcdGet(reader.etcdCli, prefix, clientv3.WithPrefix()) +// if err != nil { +// log.Warn("fail to get the collection field data", zap.String("prefix", prefix), zap.Error(err)) +// return err +// } +// if len(resp.Kvs) == 0 { +// err = errors.New("not found the collection field data") +// log.Warn(err.Error(), zap.String("prefix", filedPrefix)) +// return err +// } +// var fields []*schemapb.FieldSchema +// for _, kv := range resp.Kvs { +// field := &schemapb.FieldSchema{} +// err = proto.Unmarshal(kv.Value, field) +// if err != nil { +// log.Warn("fail to unmarshal filed schema info", +// zap.String("key", util.ToString(kv.Key)), zap.String("value", util.Base64Encode(kv.Value)), zap.Error(err)) +// return err +// } +// if field.Name == common.MetaFieldName { +// info.Schema.EnableDynamicField = true +// continue +// } +// if field.FieldID >= 100 { +// fields = append(fields, field) +// } +// } +// info.Schema.Fields = fields +// return nil +// } +// +// func (reader *CollectionReader) readStreamData(info *pb.CollectionInfo, sendCreateMsg bool) { +// isRepeatCollection := func(id int64) bool { +// reader.readingLock.Lock() +// defer reader.readingLock.Unlock() +// +// if lo.Contains(reader.readingSteamCollection, id) { +// return true +// } +// reader.readingSteamCollection = append(reader.readingSteamCollection, id) +// return false +// } +// if isRepeatCollection(info.ID) { +// return +// } +// reader.monitor.OnSuccessGetACollectionInfo(info.ID, reader.collectionName(info)) +// +// if sendCreateMsg { +// schemaByte, err := json.Marshal(info.Schema) +// if err != nil { +// log.Warn("fail to marshal the collection schema", zap.Error(err)) +// reader.monitor.OnFailReadStream(info.ID, reader.collectionName(info), "unknown", err) +// return +// } +// createCollectionMsg := &msgstream.CreateCollectionMsg{ +// BaseMsg: msgstream.BaseMsg{ +// HashValues: []uint32{0}, +// }, +// CreateCollectionRequest: msgpb.CreateCollectionRequest{ +// Base: &commonpb.MsgBase{ +// MsgType: commonpb.MsgType_CreateCollection, +// }, +// CollectionName: reader.collectionName(info), +// CollectionID: info.ID, +// Schema: schemaByte, +// }, +// } +// reader.sendData(&model.CDCData{ +// Msg: createCollectionMsg, +// Extra: map[string]any{ +// model.ShardNumKey: info.ShardsNum, +// model.ConsistencyLevelKey: info.ConsistencyLevel, +// model.CollectionPropertiesKey: info.Properties, +// }, +// }) +// } +// +// vchannels := info.VirtualChannelNames +// barrierManager := NewDataBarrierManager(len(vchannels), reader.sendData) +// log.Info("read vchannels", zap.Strings("channels", vchannels)) +// for _, vchannel := range vchannels { +// position, err := reader.collectionPosition(info, vchannel) +// handleError := func() { +// reader.monitor.OnFailReadStream(info.ID, reader.collectionName(info), vchannel, err) +// reader.isQuit.Store(true) +// } +// if err != nil { +// log.Warn("fail to find the collection position", zap.String("vchannel", vchannel), zap.Error(err)) +// handleError() +// return +// } +// stream, err := reader.msgStream() +// if err != nil { +// log.Warn("fail to new message stream", zap.String("vchannel", vchannel), zap.Error(err)) +// handleError() +// return +// } +// msgChan, err := reader.msgStreamChan(vchannel, position, stream) +// if err != nil { +// stream.Close() +// log.Warn("fail to get message stream chan", zap.String("vchannel", vchannel), zap.Error(err)) +// handleError() +// return +// } +// reader.closeStreamFuncs.Append(stream.Close) +// go reader.readMsg(reader.collectionName(info), info.ID, vchannel, msgChan, barrierManager) +// } +// } +// +// func (reader *CollectionReader) collectionPosition(info *pb.CollectionInfo, vchannelName string) (*msgstream.MsgPosition, error) { +// pchannel := util.ToPhysicalChannel(vchannelName) +// for _, collection := range reader.collections { +// if collection.collectionName == reader.collectionName(info) && +// collection.positions != nil { +// if pair, ok := collection.positions[pchannel]; ok { +// return &msgstream.MsgPosition{ +// ChannelName: vchannelName, +// MsgID: pair.GetData(), +// }, nil +// } +// } +// } +// // return util.GetChannelStartPosition(vchannelName, info.StartPositions) +// return nil, nil +// } +// +// func (reader *CollectionReader) msgStream() (msgstream.MsgStream, error) { +// var factory msgstream.Factory +// if reader.mqConfig.Pulsar.Address != "" { +// factory = reader.factoryCreator.NewPmsFactory(&reader.mqConfig.Pulsar) +// } else if reader.mqConfig.Kafka.Address != "" { +// factory = reader.factoryCreator.NewKmsFactory(&reader.mqConfig.Kafka) +// } else { +// return nil, errors.New("fail to get the msg stream, check the mqConfig param") +// } +// stream, err := factory.NewMsgStream(context.Background()) +// if err != nil { +// log.Warn("fail to new the msg stream", zap.Error(err)) +// } +// return stream, err +// } +// +// func (reader *CollectionReader) msgStreamChan(vchannel string, position *msgstream.MsgPosition, stream msgstream.MsgStream) (<-chan *msgstream.MsgPack, error) { +// consumeSubName := vchannel + string(rand.Int31()) +// pchannelName := util.ToPhysicalChannel(vchannel) +// stream.AsConsumer(context.Background(), []string{pchannelName}, consumeSubName, mqwrapper.SubscriptionPositionLatest) +// if position == nil { +// return stream.Chan(), nil +// } +// position.ChannelName = pchannelName +// err := stream.Seek(context.Background(), []*msgstream.MsgPosition{position}) +// if err != nil { +// stream.Close() +// log.Warn("fail to seek the msg position", zap.String("vchannel", vchannel), zap.Error(err)) +// return nil, err +// } +// +// return stream.Chan(), nil +// } +// +// func (reader *CollectionReader) readMsg(collectionName string, collectionID int64, vchannelName string, +// c <-chan *msgstream.MsgPack, +// barrierManager *DataBarrierManager, +// ) { +// for { +// if reader.isQuit.Load() && barrierManager.IsEmpty() { +// return +// } +// msgPack := <-c +// if msgPack == nil { +// return +// } +// for _, msg := range msgPack.Msgs { +// msgType := msg.Type() +// if reader.filterMsgType(msgType) { +// continue +// } +// log.Info("msgType", zap.Any("msg_type", msgType)) +// if reader.filterMsg(collectionName, collectionID, msg) { +// continue +// } +// data := &model.CDCData{ +// Msg: msg, +// } +// if barrierManager.IsBarrierData(data) { +// if dropPartitionMsg, ok := msg.(*msgstream.DropPartitionMsg); ok { +// dropPartitionMsg.CollectionName = collectionName +// } +// barrierManager.AddData(vchannelName, data) +// if _, ok := msg.(*msgstream.DropCollectionMsg); ok { +// return +// } +// continue +// } +// reader.sendData(&model.CDCData{ +// Msg: msg, +// Extra: map[string]any{ +// model.CollectionIDKey: collectionID, +// model.CollectionNameKey: collectionName, +// }, +// }) +// } +// } +// } +// +// func (reader *CollectionReader) filterMsgType(msgType commonpb.MsgType) bool { +// return msgType == commonpb.MsgType_TimeTick +// } +// +// func (reader *CollectionReader) filterMsg(collectionName string, collectionID int64, msg msgstream.TsMsg) bool { +// if x, ok := msg.(interface{ GetCollectionName() string }); ok { +// notEqual := x.GetCollectionName() != collectionName +// if notEqual { +// log.Warn("filter msg", +// zap.String("current_collection_name", collectionName), +// zap.String("msg_collection_name", x.GetCollectionName()), +// zap.Any("msg_type", msg.Type())) +// reader.monitor.OnFilterReadMsg(msg.Type().String()) +// } +// return notEqual +// } +// if y, ok := msg.(interface{ GetCollectionID() int64 }); ok { +// notEqual := y.GetCollectionID() != collectionID +// if notEqual { +// log.Warn("filter msg", +// zap.Int64("current_collection_id", collectionID), +// zap.Int64("msg_collection_name", y.GetCollectionID()), +// zap.Any("msg_type", msg.Type())) +// reader.monitor.OnFilterReadMsg(msg.Type().String()) +// } +// return notEqual +// } +// return true +// } +// +// func (reader *CollectionReader) CancelWatchCollection() { +// if reader.cancelWatch != nil { +// reader.cancelWatch() +// } +// } + +// func (reader *CollectionReader) QuitRead(ctx context.Context) { +// reader.quitOnce.Do(func() { +// reader.isQuit.Store(true) +// reader.CancelWatchCollection() +// reader.closeStreamFuncs.Range(func(_ int, value func()) bool { +// value() +// return true +// }) +// }) +// } + +// func (reader *CollectionReader) sendData(data *model.CDCData) { +// reader.dataChan <- data +// } diff --git a/core/reader/collection_reader_test.go b/core/reader/collection_reader_test.go new file mode 100644 index 00000000..f9219df1 --- /dev/null +++ b/core/reader/collection_reader_test.go @@ -0,0 +1,589 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package reader_test + +// import ( +// "context" +// "fmt" +// "strconv" +// "testing" +// "time" +// +// "github.com/cockroachdb/errors" +// "github.com/goccy/go-json" +// "github.com/golang/protobuf/proto" +// "github.com/stretchr/testify/assert" +// "github.com/stretchr/testify/mock" +// "go.etcd.io/etcd/api/v3/mvccpb" +// clientv3 "go.etcd.io/etcd/client/v3" +// +// "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" +// "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" +// "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" +// "github.com/milvus-io/milvus/pkg/mq/msgstream" +// +// "github.com/zilliztech/milvus-cdc/core/config" +// "github.com/zilliztech/milvus-cdc/core/mocks" +// "github.com/zilliztech/milvus-cdc/core/model" +// "github.com/zilliztech/milvus-cdc/core/pb" +// "github.com/zilliztech/milvus-cdc/core/reader" +// "github.com/zilliztech/milvus-cdc/core/util" +// ) +// +// var ( +// endpoints = []string{"localhost:2379"} +// rootPath = "dev" +// metaSubPath = "meta" +// collectionKey = "root-coord/collection" +// filedKey = "root-coord/fields" +// collectionPrefix = util.GetCollectionPrefix(rootPath, metaSubPath, collectionKey) +// fieldPrefix = util.GetFieldPrefix(rootPath, metaSubPath, filedKey) +// etcdConfig = config.NewMilvusEtcdConfig(config.MilvusEtcdEndpointsOption(endpoints), +// config.MilvusEtcdRootPathOption(rootPath), +// config.MilvusEtcdMetaSubPathOption(metaSubPath)) +// pulsarConfig = config.NewPulsarConfig( +// config.PulsarAddressOption(fmt.Sprintf("pulsar://%s:%d", "localhost", 6650)), +// config.PulsarWebAddressOption("", 80), +// config.PulsarMaxMessageSizeOption(5242880), +// config.PulsarTenantOption("public", "default"), +// ) +// kafkaConfig = config.NewKafkaConfig(config.KafkaAddressOption("localhost:9092")) +// ) +// +// func TestNewMilvusCollectionReader(t *testing.T) { +// monitor := mocks.NewMonitor(t) +// var options []config.Option[*reader.CollectionReader] +// mockEtcdCli := mocks.NewKVApi(t) +// mockEtcdCli.On("Endpoints").Return(endpoints) +// +// util.MockEtcdClient(func(cfg clientv3.Config) (util.KVApi, error) { +// return mockEtcdCli, nil +// }, func() { +// call := mockEtcdCli.On("Status", mock.Anything, endpoints[0]).Return(nil, errors.New("status error")) +// defer call.Unset() +// _, err := reader.NewCollectionReader(append(options, +// reader.EtcdOption(etcdConfig), +// reader.MqOption(config.PulsarConfig{}, config.KafkaConfig{Address: "address"}), +// reader.MonitorOption(monitor), +// reader.ChanLenOption(10))...) +// assert.Error(t, err) +// }) +// +// util.MockEtcdClient(func(cfg clientv3.Config) (util.KVApi, error) { +// return mockEtcdCli, nil +// }, func() { +// call := mockEtcdCli.On("Status", mock.Anything, endpoints[0]).Return(&clientv3.StatusResponse{}, nil) +// defer call.Unset() +// _, err := reader.NewCollectionReader(append(options, +// reader.EtcdOption(etcdConfig), +// reader.MqOption(pulsarConfig, config.KafkaConfig{}), +// reader.MonitorOption(monitor), +// reader.ChanLenOption(10))...) +// assert.NoError(t, err) +// }) +// } +// +// func TestReaderGetCollectionInfo(t *testing.T) { +// mockEtcdCli := mocks.NewKVApi(t) +// mockEtcdCli.On("Endpoints").Return(endpoints) +// +// util.MockEtcdClient(func(cfg clientv3.Config) (util.KVApi, error) { +// return mockEtcdCli, nil +// }, func() { +// call := mockEtcdCli.On("Status", mock.Anything, endpoints[0]).Return(&clientv3.StatusResponse{}, nil) +// defer call.Unset() +// collectionName1 := "coll1" +// collectionID1 := int64(100) +// collectionName2 := "coll2" +// collectionID2 := int64(200) +// +// factoryCreator := mocks.NewFactoryCreator(t) +// monitor := mocks.NewMonitor(t) +// +// util.EtcdOpRetryTime = 1 +// defer func() { +// util.EtcdOpRetryTime = 5 +// }() +// +// t.Run("get error", func(t *testing.T) { +// var options []config.Option[*reader.CollectionReader] +// options = append(options, reader.CollectionInfoOption(collectionName1, nil)) +// options = append(options, reader.CollectionInfoOption(collectionName2, nil)) +// collectionReader, err := reader.NewCollectionReader(append(options, +// reader.DBOption(int64(0)), +// reader.FactoryCreatorOption(factoryCreator), +// reader.EtcdOption(etcdConfig), +// reader.MqOption(pulsarConfig, config.KafkaConfig{}), +// reader.MonitorOption(monitor), +// reader.ChanLenOption(10))...) +// assert.NoError(t, err) +// getCall := mockEtcdCli.On("Get", mock.Anything, collectionPrefix+"/", mock.Anything).Return(nil, errors.New("get error")) +// watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(make(clientv3.WatchChan)) +// unknownCall := monitor.On("OnFailUnKnowCollection", collectionPrefix, mock.Anything).Return() +// defer func() { +// watchCall.Unset() +// getCall.Unset() +// unknownCall.Unset() +// }() +// collectionReader.StartRead(context.Background()) +// time.Sleep(time.Second) +// collectionReader.QuitRead(context.Background()) +// monitor.AssertCalled(t, "OnFailUnKnowCollection", collectionPrefix, mock.Anything) +// }) +// +// info1 := &pb.CollectionInfo{ +// ID: collectionID1, +// Schema: &schemapb.CollectionSchema{ +// Name: collectionName1, +// }, +// } +// byte1, _ := proto.Marshal(info1) +// info2 := &pb.CollectionInfo{ +// ID: collectionID2, +// Schema: &schemapb.CollectionSchema{ +// Name: collectionName2, +// }, +// } +// byte2, _ := proto.Marshal(info2) +// field := &schemapb.FieldSchema{ +// FieldID: 101, +// } +// fieldByte3, _ := proto.Marshal(field) +// +// t.Run("field error", func(t *testing.T) { +// getCall := mockEtcdCli.EXPECT().Get(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) (*clientv3.GetResponse, error) { +// if s == collectionPrefix+"/" { +// return &clientv3.GetResponse{ +// Kvs: []*mvccpb.KeyValue{ +// { +// Value: byte2, +// }, +// { +// Value: byte1, +// }, +// }, +// }, nil +// } +// return nil, errors.New("get error") +// }) +// watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(make(clientv3.WatchChan)) +// unknownCall := monitor.On("OnFailUnKnowCollection", collectionPrefix, mock.Anything).Return() +// defer func() { +// getCall.Unset() +// watchCall.Unset() +// unknownCall.Unset() +// }() +// +// var options []config.Option[*reader.CollectionReader] +// options = append(options, reader.CollectionInfoOption(collectionName1, nil)) +// collectionReader, err := reader.NewCollectionReader(append(options, +// reader.DBOption(int64(0)), +// reader.FactoryCreatorOption(factoryCreator), +// reader.EtcdOption(etcdConfig), +// reader.MqOption(pulsarConfig, config.KafkaConfig{}), +// reader.MonitorOption(monitor), +// reader.ChanLenOption(10))...) +// assert.NoError(t, err) +// collectionReader.StartRead(context.Background()) +// time.Sleep(time.Second) +// collectionReader.QuitRead(context.Background()) +// }) +// +// t.Run("no field error", func(t *testing.T) { +// getCall := mockEtcdCli.EXPECT().Get(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) (*clientv3.GetResponse, error) { +// if s == collectionPrefix+"/" { +// return &clientv3.GetResponse{ +// Kvs: []*mvccpb.KeyValue{ +// { +// Value: byte2, +// }, +// { +// Value: byte1, +// }, +// }, +// }, nil +// } else if s == fieldPrefix+"/"+strconv.FormatInt(collectionID1, 10)+"/" { +// return &clientv3.GetResponse{ +// Kvs: []*mvccpb.KeyValue{}, +// }, nil +// } +// return nil, errors.New("get error") +// }) +// watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(make(clientv3.WatchChan)) +// unknownCall := monitor.On("OnFailUnKnowCollection", collectionPrefix, mock.Anything).Return() +// defer func() { +// getCall.Unset() +// watchCall.Unset() +// unknownCall.Unset() +// }() +// +// var options []config.Option[*reader.CollectionReader] +// options = append(options, reader.CollectionInfoOption(collectionName1, nil)) +// collectionReader, err := reader.NewCollectionReader(append(options, +// reader.DBOption(int64(0)), +// reader.FactoryCreatorOption(factoryCreator), +// reader.EtcdOption(etcdConfig), +// reader.MqOption(pulsarConfig, config.KafkaConfig{}), +// reader.MonitorOption(monitor), +// reader.ChanLenOption(10))...) +// assert.NoError(t, err) +// collectionReader.StartRead(context.Background()) +// time.Sleep(time.Second) +// collectionReader.QuitRead(context.Background()) +// }) +// +// t.Run("success", func(t *testing.T) { +// getCall := mockEtcdCli.EXPECT().Get(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) (*clientv3.GetResponse, error) { +// if s == collectionPrefix+"/" { +// return &clientv3.GetResponse{ +// Kvs: []*mvccpb.KeyValue{ +// { +// Value: byte2, +// }, +// { +// Value: byte1, +// }, +// }, +// }, nil +// } else if s == fieldPrefix+"/"+strconv.FormatInt(collectionID1, 10)+"/" { +// return &clientv3.GetResponse{ +// Kvs: []*mvccpb.KeyValue{ +// { +// Value: fieldByte3, +// }, +// }, +// }, nil +// } +// return nil, errors.New("get error") +// }) +// watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(make(clientv3.WatchChan)) +// asuccessCall := monitor.On("OnSuccessGetACollectionInfo", collectionID1, collectionName1).Return() +// successCall := monitor.On("OnSuccessGetAllCollectionInfo").Return() +// defer func() { +// getCall.Unset() +// watchCall.Unset() +// asuccessCall.Unset() +// successCall.Unset() +// }() +// +// var options []config.Option[*reader.CollectionReader] +// options = append(options, reader.CollectionInfoOption(collectionName1, nil)) +// collectionReader, err := reader.NewCollectionReader(append(options, +// reader.DBOption(int64(0)), +// reader.FactoryCreatorOption(factoryCreator), +// reader.EtcdOption(etcdConfig), +// reader.MqOption(pulsarConfig, config.KafkaConfig{}), +// reader.MonitorOption(monitor), +// reader.ChanLenOption(10))...) +// assert.NoError(t, err) +// collectionReader.StartRead(context.Background()) +// time.Sleep(time.Second) +// collectionReader.QuitRead(context.Background()) +// }) +// }) +// } +// +// func TestReaderWatchCollectionInfo(t *testing.T) { +// mockEtcdCli := mocks.NewKVApi(t) +// mockEtcdCli.On("Endpoints").Return(endpoints) +// call := mockEtcdCli.On("Status", mock.Anything, endpoints[0]).Return(&clientv3.StatusResponse{}, nil) +// defer call.Unset() +// collectionName1 := "coll1" +// collectionID1 := int64(100) +// factoryCreator := mocks.NewFactoryCreator(t) +// field := &schemapb.FieldSchema{ +// FieldID: 101, +// } +// fieldByte3, _ := proto.Marshal(field) +// +// util.MockEtcdClient(func(cfg clientv3.Config) (util.KVApi, error) { +// return mockEtcdCli, nil +// }, func() { +// monitor := mocks.NewMonitor(t) +// +// t.Run("watch chan close", func(t *testing.T) { +// getCall := mockEtcdCli.EXPECT().Get(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) (*clientv3.GetResponse, error) { +// if s == collectionPrefix+"/" { +// return &clientv3.GetResponse{ +// Kvs: []*mvccpb.KeyValue{}, +// }, nil +// } else if s == fieldPrefix+"/"+strconv.FormatInt(collectionID1, 10)+"/" { +// return &clientv3.GetResponse{ +// Kvs: []*mvccpb.KeyValue{ +// { +// Value: fieldByte3, +// }, +// }, +// }, nil +// } +// return nil, errors.New("get error") +// }) +// watchChan := make(chan clientv3.WatchResponse) +// var onlyReadChan clientv3.WatchChan = watchChan +// watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(onlyReadChan) +// closeCall := monitor.On("WatchChanClosed").Return() +// +// defer func() { +// getCall.Unset() +// watchCall.Unset() +// closeCall.Unset() +// }() +// close(watchChan) +// +// var options []config.Option[*reader.CollectionReader] +// options = append(options, reader.CollectionInfoOption(collectionName1, nil)) +// collectionReader, err := reader.NewCollectionReader(append(options, +// reader.DBOption(int64(0)), +// reader.FactoryCreatorOption(factoryCreator), +// reader.EtcdOption(etcdConfig), +// reader.MqOption(pulsarConfig, config.KafkaConfig{}), +// reader.MonitorOption(monitor), +// reader.ChanLenOption(10))...) +// assert.NoError(t, err) +// collectionReader.StartRead(context.Background()) +// time.Sleep(time.Second) +// collectionReader.QuitRead(context.Background()) +// }) +// }) +// +// util.MockEtcdClient(func(cfg clientv3.Config) (util.KVApi, error) { +// return mockEtcdCli, nil +// }, func() { +// monitor := mocks.NewMonitor(t) +// t.Run("send msg", func(t *testing.T) { +// msgStream1 := mocks.NewMsgStream(t) +// msgStream2 := mocks.NewMsgStream(t) +// factory := mocks.NewFactory(t) +// pmsCall := factoryCreator.EXPECT().NewPmsFactory(mock.Anything).RunAndReturn(func(c *config.PulsarConfig) msgstream.Factory { +// return factory +// }) +// defer pmsCall.Unset() +// i := 0 +// factory.EXPECT().NewMsgStream(mock.Anything).RunAndReturn(func(ctx context.Context) (msgstream.MsgStream, error) { +// if i == 0 { +// i++ +// return msgStream1, nil +// } +// return msgStream2, nil +// }) +// +// shardNum := int32(4) +// level := commonpb.ConsistencyLevel_Session +// kv := &commonpb.KeyValuePair{Key: "foo", Value: "123"} +// info1 := &pb.CollectionInfo{ +// ID: collectionID1, +// Schema: &schemapb.CollectionSchema{ +// Name: collectionName1, +// }, +// State: pb.CollectionState_CollectionCreated, +// ShardsNum: shardNum, +// ConsistencyLevel: level, +// Properties: []*commonpb.KeyValuePair{ +// kv, +// }, +// VirtualChannelNames: []string{"p1_v1", "p2_v1"}, +// StartPositions: []*commonpb.KeyDataPair{ +// { +// Key: "p1", +// Data: []byte("foo1"), +// }, +// { +// Key: "p2", +// Data: []byte("foo2"), +// }, +// }, +// } +// byte1, _ := proto.Marshal(info1) +// +// getCall := mockEtcdCli.EXPECT().Get(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) (*clientv3.GetResponse, error) { +// if s == collectionPrefix+"/" { +// return &clientv3.GetResponse{ +// Kvs: []*mvccpb.KeyValue{}, +// }, nil +// } else if s == fieldPrefix+"/"+strconv.FormatInt(collectionID1, 10)+"/" { +// return &clientv3.GetResponse{ +// Kvs: []*mvccpb.KeyValue{ +// { +// Value: fieldByte3, +// }, +// }, +// }, nil +// } +// return nil, errors.New("get error") +// }) +// watchChan := make(chan clientv3.WatchResponse, 10) +// var onlyReadChan clientv3.WatchChan = watchChan +// watchCall := mockEtcdCli.EXPECT().Watch(mock.Anything, mock.Anything, mock.Anything).Return(onlyReadChan).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) clientv3.WatchChan { +// if s != collectionPrefix+"/" { +// closeChan := make(chan clientv3.WatchResponse, 10) +// close(closeChan) +// return closeChan +// } +// return onlyReadChan +// }) +// //watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(onlyReadChan) +// asuccessCall := monitor.On("OnSuccessGetACollectionInfo", collectionID1, collectionName1).Return() +// filterCall := monitor.On("OnFilterReadMsg", "Delete").Return() +// msgStream1.EXPECT().AsConsumer(mock.Anything, mock.Anything, mock.Anything).Return() +// msgStream2.EXPECT().AsConsumer(mock.Anything, mock.Anything, mock.Anything).Return() +// msgStream1.EXPECT().Close().Return() +// msgStream2.EXPECT().Close().Return() +// msgStream1.EXPECT().Seek(mock.Anything).RunAndReturn(func(positions []*msgstream.MsgPosition) error { +// if positions[0].ChannelName == "p1" { +// assert.EqualValues(t, []byte("hello"), positions[0].MsgID) +// return nil +// } +// return errors.New("consume error") +// }) +// msgStream2.EXPECT().Seek(mock.Anything).RunAndReturn(func(positions []*msgstream.MsgPosition) error { +// if positions[0].ChannelName == "p2" { +// assert.EqualValues(t, []byte("foo2"), positions[0].MsgID) +// return nil +// } +// return errors.New("consume error") +// }) +// ch1 := make(chan *msgstream.MsgPack, 10) +// ch2 := make(chan *msgstream.MsgPack, 10) +// msgStream1.EXPECT().Chan().Return(ch1) +// msgStream2.EXPECT().Chan().Return(ch2) +// defer func() { +// getCall.Unset() +// watchCall.Unset() +// asuccessCall.Unset() +// filterCall.Unset() +// }() +// +// var options []config.Option[*reader.CollectionReader] +// options = append(options, reader.CollectionInfoOption(collectionName1, map[string]*commonpb.KeyDataPair{ +// "p1": {Key: "p1", Data: []byte("hello")}, +// "p2": {Key: "p2", Data: []byte("foo2")}, +// })) +// collectionReader, err := reader.NewCollectionReader(append(options, +// reader.DBOption(int64(0)), +// reader.FactoryCreatorOption(factoryCreator), +// reader.EtcdOption(etcdConfig), +// reader.MqOption(pulsarConfig, config.KafkaConfig{}), +// reader.MonitorOption(monitor), +// reader.ChanLenOption(10))...) +// assert.NoError(t, err) +// cdcChan := collectionReader.StartRead(context.Background()) +// ch1 <- &msgstream.MsgPack{ +// Msgs: []msgstream.TsMsg{ +// &msgstream.TimeTickMsg{ +// TimeTickMsg: msgpb.TimeTickMsg{Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_TimeTick}}, +// }, +// &msgstream.InsertMsg{ +// InsertRequest: msgpb.InsertRequest{ +// Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_Insert}, +// CollectionName: collectionName1, +// CollectionID: collectionID1, +// }, +// }, +// &msgstream.DropCollectionMsg{ +// DropCollectionRequest: msgpb.DropCollectionRequest{ +// Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_DropCollection}, +// CollectionName: collectionName1, +// CollectionID: collectionID1, +// }, +// }, +// }, +// } +// ch2 <- &msgstream.MsgPack{ +// Msgs: []msgstream.TsMsg{ +// &msgstream.InsertMsg{ +// InsertRequest: msgpb.InsertRequest{ +// Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_Delete}, +// CollectionName: "xxxxx", +// }, +// }, +// &msgstream.DeleteMsg{ +// DeleteRequest: msgpb.DeleteRequest{ +// Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_Delete}, +// CollectionName: collectionName1, +// CollectionID: collectionID1, +// }, +// }, +// &msgstream.DropCollectionMsg{ +// DropCollectionRequest: msgpb.DropCollectionRequest{ +// Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_DropCollection}, +// CollectionName: collectionName1, +// CollectionID: collectionID1, +// }, +// }, +// }, +// } +// +// watchChan <- clientv3.WatchResponse{ +// Events: []*clientv3.Event{ +// { +// Type: clientv3.EventTypePut, +// Kv: &mvccpb.KeyValue{ +// Key: []byte(collectionPrefix + "/" + strconv.FormatInt(collectionID1, 10)), +// Value: byte1, +// }, +// }, +// }, +// } +// +// time.Sleep(time.Second) +// collectionReader.QuitRead(context.Background()) +// +// //create message +// cdcData := <-cdcChan +// util.Log.Info("xxxxxxxxx") +// assert.EqualValues(t, shardNum, cdcData.Extra[model.ShardNumKey]) +// assert.EqualValues(t, level, cdcData.Extra[model.ConsistencyLevelKey]) +// receiveKv := cdcData.Extra[model.CollectionPropertiesKey].([]*commonpb.KeyValuePair)[0] +// assert.EqualValues(t, kv.Key, receiveKv.Key) +// assert.EqualValues(t, kv.Value, receiveKv.Value) +// createCollectionMsg := cdcData.Msg.(*msgstream.CreateCollectionMsg) +// assert.EqualValues(t, collectionID1, createCollectionMsg.CollectionID) +// assert.EqualValues(t, collectionName1, createCollectionMsg.CollectionName) +// schema := &schemapb.CollectionSchema{ +// Name: collectionName1, +// Fields: []*schemapb.FieldSchema{field}, +// } +// schemaByte, _ := json.Marshal(schema) +// assert.EqualValues(t, schemaByte, createCollectionMsg.Schema) +// +// hasInsert, hasDelete := false, false +// checkInsertOrDelete := func(d *model.CDCData) { +// if insertMsg, ok := d.Msg.(*msgstream.InsertMsg); ok { +// hasInsert = true +// assert.Equal(t, collectionName1, insertMsg.CollectionName) +// return +// } +// if deleteMsg, ok := d.Msg.(*msgstream.DeleteMsg); ok { +// hasDelete = true +// assert.Equal(t, collectionName1, deleteMsg.CollectionName) +// return +// } +// } +// cdcData = <-cdcChan +// checkInsertOrDelete(cdcData) +// cdcData = <-cdcChan +// checkInsertOrDelete(cdcData) +// assert.True(t, hasInsert) +// assert.True(t, hasDelete) +// +// cdcData = <-cdcChan +// dropMsg := cdcData.Msg.(*msgstream.DropCollectionMsg) +// assert.Equal(t, collectionName1, dropMsg.CollectionName) +// assert.Len(t, cdcData.Extra[model.DropCollectionMsgsKey], 1) +// }) +// }) +// } diff --git a/core/reader/config_option.go b/core/reader/config_option.go index 66541d0d..e57d100e 100644 --- a/core/reader/config_option.go +++ b/core/reader/config_option.go @@ -16,120 +16,121 @@ package reader -import ( - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus/pkg/mq/msgstream/mqwrapper" - "github.com/zilliztech/milvus-cdc/core/config" -) - -func CollectionInfoOption(collectionName string, positions map[string]*commonpb.KeyDataPair) config.Option[*MilvusCollectionReader] { - return config.OptionFunc[*MilvusCollectionReader](func(object *MilvusCollectionReader) { - object.collections = append(object.collections, CollectionInfo{ - collectionName: collectionName, - positions: positions, - }) - }) -} - -func KafKaOption(options ...config.Option[*config.KafkaConfig]) config.Option[*MilvusCollectionReader] { - return config.OptionFunc[*MilvusCollectionReader](func(object *MilvusCollectionReader) { - object.mqConfig = config.MilvusMQConfig{Kafka: config.NewKafkaConfig(options...)} - }) -} - -func PulsarOption(options ...config.Option[*config.PulsarConfig]) config.Option[*MilvusCollectionReader] { - return config.OptionFunc[*MilvusCollectionReader](func(object *MilvusCollectionReader) { - object.mqConfig = config.MilvusMQConfig{Pulsar: config.NewPulsarConfig(options...)} - }) -} - -func MqOption(p config.PulsarConfig, k config.KafkaConfig) config.Option[*MilvusCollectionReader] { - return config.OptionFunc[*MilvusCollectionReader](func(object *MilvusCollectionReader) { - object.mqConfig = config.MilvusMQConfig{Pulsar: p, Kafka: k} - }) -} - -func EtcdOption(c config.MilvusEtcdConfig) config.Option[*MilvusCollectionReader] { - return config.OptionFunc[*MilvusCollectionReader](func(object *MilvusCollectionReader) { - object.etcdConfig = c - }) -} - -// MonitorOption the implement object of Monitor should include DefaultMonitor for the better compatibility -func MonitorOption(m Monitor) config.Option[*MilvusCollectionReader] { - return config.OptionFunc[*MilvusCollectionReader](func(object *MilvusCollectionReader) { - object.monitor = m - }) -} - -func ChanLenOption(l int) config.Option[*MilvusCollectionReader] { - return config.OptionFunc[*MilvusCollectionReader](func(object *MilvusCollectionReader) { - if l > 0 { - object.dataChanLen = l - } - }) -} - -func FactoryCreatorOption(f FactoryCreator) config.Option[*MilvusCollectionReader] { - return config.OptionFunc[*MilvusCollectionReader](func(object *MilvusCollectionReader) { - if f != nil { - object.factoryCreator = f - } - }) -} - -func ShouldReadFuncOption(f ShouldReadFunc) config.Option[*MilvusCollectionReader] { - return config.OptionFunc[*MilvusCollectionReader](func(object *MilvusCollectionReader) { - if f != nil { - object.shouldReadFunc = f - } - }) -} - -func DBOption(db int64) config.Option[*MilvusCollectionReader] { - return config.OptionFunc[*MilvusCollectionReader](func(object *MilvusCollectionReader) { - object.dbID = db - }) -} - -func MqChannelOption(p config.PulsarConfig, k config.KafkaConfig) config.Option[*ChannelReader] { - return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { - object.mqConfig = config.MilvusMQConfig{Pulsar: p, Kafka: k} - }) -} - -func FactoryChannelOption(f FactoryCreator) config.Option[*ChannelReader] { - return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { - if f != nil { - object.factoryCreator = f - } - }) -} - -func ChannelNameOption(c string) config.Option[*ChannelReader] { - return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { - if c != "" { - object.channelName = c - } - }) -} - -func SubscriptionPositionChannelOption(p mqwrapper.SubscriptionInitialPosition) config.Option[*ChannelReader] { - return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { - object.subscriptionPosition = p - }) -} - -func SeekPositionChannelOption(p string) config.Option[*ChannelReader] { - return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { - object.seekPosition = p - }) -} - -func DataChanChannelOption(l int) config.Option[*ChannelReader] { - return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { - if l > 0 { - object.dataChanLen = l - } - }) -} +// import ( +// "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" +// "github.com/milvus-io/milvus/pkg/mq/msgstream/mqwrapper" +// +// "github.com/zilliztech/milvus-cdc/core/config" +// ) +// +// func CollectionInfoOption(collectionName string, positions map[string]*commonpb.KeyDataPair) config.Option[*CollectionReader] { +// return config.OptionFunc[*CollectionReader](func(object *CollectionReader) { +// object.collections = append(object.collections, CollectionInfo{ +// collectionName: collectionName, +// positions: positions, +// }) +// }) +// } +// +// func KafKaOption(options ...config.Option[*config.KafkaConfig]) config.Option[*CollectionReader] { +// return config.OptionFunc[*CollectionReader](func(object *CollectionReader) { +// object.mqConfig = config.MilvusMQConfig{Kafka: config.NewKafkaConfig(options...)} +// }) +// } +// +// func PulsarOption(options ...config.Option[*config.PulsarConfig]) config.Option[*CollectionReader] { +// return config.OptionFunc[*CollectionReader](func(object *CollectionReader) { +// object.mqConfig = config.MilvusMQConfig{Pulsar: config.NewPulsarConfig(options...)} +// }) +// } +// +// func MqOption(p config.PulsarConfig, k config.KafkaConfig) config.Option[*CollectionReader] { +// return config.OptionFunc[*CollectionReader](func(object *CollectionReader) { +// object.mqConfig = config.MilvusMQConfig{Pulsar: p, Kafka: k} +// }) +// } +// +// func EtcdOption(c config.MilvusEtcdConfig) config.Option[*CollectionReader] { +// return config.OptionFunc[*CollectionReader](func(object *CollectionReader) { +// object.etcdConfig = c +// }) +// } +// +// // MonitorOption the implement object of Monitor should include DefaultMonitor for the better compatibility +// func MonitorOption(m Monitor) config.Option[*CollectionReader] { +// return config.OptionFunc[*CollectionReader](func(object *CollectionReader) { +// object.monitor = m +// }) +// } +// +// func ChanLenOption(l int) config.Option[*CollectionReader] { +// return config.OptionFunc[*CollectionReader](func(object *CollectionReader) { +// if l > 0 { +// object.dataChanLen = l +// } +// }) +// } +// +// func FactoryCreatorOption(f FactoryCreator) config.Option[*CollectionReader] { +// return config.OptionFunc[*CollectionReader](func(object *CollectionReader) { +// if f != nil { +// object.factoryCreator = f +// } +// }) +// } +// +// func ShouldReadFuncOption(f ShouldReadFunc) config.Option[*CollectionReader] { +// return config.OptionFunc[*CollectionReader](func(object *CollectionReader) { +// if f != nil { +// object.shouldReadFunc = f +// } +// }) +// } +// +// func DBOption(db int64) config.Option[*CollectionReader] { +// return config.OptionFunc[*CollectionReader](func(object *CollectionReader) { +// object.dbID = db +// }) +// } +// +// func MqChannelOption(p config.PulsarConfig, k config.KafkaConfig) config.Option[*ChannelReader] { +// return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { +// object.mqConfig = config.MilvusMQConfig{Pulsar: p, Kafka: k} +// }) +// } +// +// func FactoryChannelOption(f FactoryCreator) config.Option[*ChannelReader] { +// return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { +// if f != nil { +// object.factoryCreator = f +// } +// }) +// } +// +// func ChannelNameOption(c string) config.Option[*ChannelReader] { +// return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { +// if c != "" { +// object.channelName = c +// } +// }) +// } +// +// func SubscriptionPositionChannelOption(p mqwrapper.SubscriptionInitialPosition) config.Option[*ChannelReader] { +// return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { +// object.subscriptionPosition = p +// }) +// } +// +// func SeekPositionChannelOption(p string) config.Option[*ChannelReader] { +// return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { +// object.seekPosition = p +// }) +// } +// +// func DataChanChannelOption(l int) config.Option[*ChannelReader] { +// return config.OptionFunc[*ChannelReader](func(object *ChannelReader) { +// if l > 0 { +// object.dataChanLen = l +// } +// }) +// } diff --git a/core/reader/data_barrier.go b/core/reader/data_barrier.go index 8b854cdf..01f8576f 100644 --- a/core/reader/data_barrier.go +++ b/core/reader/data_barrier.go @@ -20,10 +20,12 @@ import ( "fmt" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/mq/msgstream" + "go.uber.org/zap" + "github.com/zilliztech/milvus-cdc/core/model" "github.com/zilliztech/milvus-cdc/core/util" - "go.uber.org/zap" ) type MeetFunc func(m map[string]*model.CDCData) @@ -116,7 +118,7 @@ func (d *DataBarrierManager) addDropCollectionData(channelName string, data *mod dropCollectionCdcData.Extra[model.DropCollectionMsgsKey] = otherDropData d.sendFunc(dropCollectionCdcData) })) - //log.Info("drop collection debug", zap.Int64("collection_id", msg.CollectionID), zap.String("channel_name", channelName)) + // log.Info("drop collection debug", zap.Int64("collection_id", msg.CollectionID), zap.String("channel_name", channelName)) return barrier.AddData(channelName, data) } @@ -137,7 +139,7 @@ func (d *DataBarrierManager) addDropPartitionData(channelName string, data *mode dropPartitionCdcData.Extra[model.DropPartitionMsgsKey] = otherDropData d.sendFunc(dropPartitionCdcData) })) - //log.Info("drop partition debug", zap.Int64("collection_id", msg.CollectionID), + // log.Info("drop partition debug", zap.Int64("collection_id", msg.CollectionID), // zap.Int64("partition_id", msg.PartitionID), zap.String("partition_name", msg.PartitionName), // zap.String("channel_name", channelName)) return barrier.AddData(channelName, data) diff --git a/core/reader/etcd_op.go b/core/reader/etcd_op.go new file mode 100644 index 00000000..f4b33d5a --- /dev/null +++ b/core/reader/etcd_op.go @@ -0,0 +1,379 @@ +package reader + +import ( + "context" + "fmt" + "path" + "strconv" + "strings" + "sync" + "time" + + "github.com/cockroachdb/errors" + "github.com/golang/protobuf/proto" + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/common" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/retry" + clientv3 "go.etcd.io/etcd/client/v3" + "go.uber.org/zap" + + "github.com/zilliztech/milvus-cdc/core/api" + "github.com/zilliztech/milvus-cdc/core/pb" + "github.com/zilliztech/milvus-cdc/core/util" +) + +var _ api.MetaOp = (*EtcdOp)(nil) + +const ( + collectionPrefix = "root-coord/database/collection-info" + partitionPrefix = "root-coord/partitions" + fieldPrefix = "root-coord/fields" +) + +type EtcdOp struct { + endpoints []string + rootPath string + metaSubPath string + defaultPartitionName string + etcdClient *clientv3.Client + collectionID2Name util.Map[int64, string] + watchCollectionOnce sync.Once + watchPartitionOnce sync.Once + + // task id -> api.CollectionFilter + subscribeCollectionEvent util.Map[string, api.CollectionEventConsumer] + subscribePartitionEvent util.Map[string, api.PartitionEventConsumer] +} + +func NewEtcdOp(endpoints []string, + rootPath, metaPath, defaultPartitionName string) (api.MetaOp, error) { + etcdOp := &EtcdOp{ + endpoints: endpoints, + rootPath: rootPath, + metaSubPath: metaPath, + defaultPartitionName: defaultPartitionName, + } + + // set default value + if len(endpoints) == 0 { + etcdOp.endpoints = []string{"127.0.0.1:2379"} + } + if rootPath == "" { + etcdOp.rootPath = "by-dev" + } + if metaPath == "" { + etcdOp.metaSubPath = "meta" + } + if defaultPartitionName == "" { + etcdOp.defaultPartitionName = "_default" + } + + var err error + log := log.With(zap.Strings("endpoints", endpoints)) + etcdOp.etcdClient, err = clientv3.New(clientv3.Config{ + Endpoints: etcdOp.endpoints, + DialTimeout: 5 * time.Second, + }) + if err != nil { + log.Warn("create etcd client failed", zap.Error(err)) + return nil, err + } + // check etcd status + timeCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + _, err = etcdOp.etcdClient.Status(timeCtx, etcdOp.endpoints[0]) + if err != nil { + log.Warn("etcd status check failed", zap.Error(err)) + return nil, err + } + log.Debug("success to create etcd client") + + return etcdOp, nil +} + +func (e *EtcdOp) collectionPrefix() string { + return fmt.Sprintf("%s/%s/%s", e.rootPath, e.metaSubPath, collectionPrefix) +} + +func (e *EtcdOp) partitionPrefix() string { + return fmt.Sprintf("%s/%s/%s", e.rootPath, e.metaSubPath, partitionPrefix) +} + +func (e *EtcdOp) fieldPrefix() string { + return fmt.Sprintf("%s/%s/%s", e.rootPath, e.metaSubPath, fieldPrefix) +} + +func (e *EtcdOp) WatchCollection(ctx context.Context, filter api.CollectionFilter) { + e.watchCollectionOnce.Do(func() { + watchChan := e.etcdClient.Watch(ctx, e.collectionPrefix()+"/", clientv3.WithPrefix()) + go func() { + for { + select { + case watchResp, ok := <-watchChan: + if !ok { + log.Info("etcd watch collection channel closed") + return + } + for _, event := range watchResp.Events { + if event.Type != clientv3.EventTypePut { + log.Debug("collection watch event type is not put", zap.String("event type", event.Type.String())) + continue + } + collectionKey := util.ToString(event.Kv.Key) + info := &pb.CollectionInfo{} + err := proto.Unmarshal(event.Kv.Value, info) + if err != nil { + log.Warn("fail to unmarshal the collection info", zap.String("key", collectionKey), zap.String("value", util.Base64Encode(event.Kv.Value)), zap.Error(err)) + continue + } + if info.State != pb.CollectionState_CollectionCreated { + log.Info("the collection is not created", zap.String("key", collectionKey), zap.String("state", info.State.String())) + continue + } + if filter != nil && filter(info) { + log.Info("the collection is filtered in the watch process", zap.String("key", collectionKey)) + continue + } + + err = retry.Do(ctx, func() error { + return e.fillCollectionField(info) + }, retry.Attempts(5), retry.Sleep(time.Second)) + if err != nil { + log.Warn("fail to fill collection field in the watch process", zap.String("key", collectionKey), zap.Error(err)) + continue + } + log.Debug("get a new collection in the watch process", zap.String("key", collectionKey)) + e.collectionID2Name.Store(info.ID, info.Schema.Name) + e.subscribeCollectionEvent.Range(func(key string, value api.CollectionEventConsumer) bool { + if value != nil && value(info) { + log.Info("the collection has been consumed", zap.Int64("collection_id", info.ID), zap.String("task_id", key)) + return false + } + return true + }) + log.Info("the collection hasn't been consumed", zap.Int64("collection_id", info.ID)) + } + case <-ctx.Done(): + log.Info("watch collection context done") + return + } + } + }() + }) +} + +func (e *EtcdOp) SubscribeCollectionEvent(taskID string, consumer api.CollectionEventConsumer) { + e.subscribeCollectionEvent.Store(taskID, consumer) +} + +func (e *EtcdOp) SubscribePartitionEvent(taskID string, consumer api.PartitionEventConsumer) { + e.subscribePartitionEvent.Store(taskID, consumer) +} + +func (e *EtcdOp) UnsubscribeEvent(taskID string, eventType api.WatchEventType) { + switch eventType { + case api.CollectionEventType: + e.subscribeCollectionEvent.Delete(taskID) + case api.PartitionEventType: + e.subscribePartitionEvent.Delete(taskID) + default: + log.Warn("unknown event type", zap.String("taskID", taskID), zap.Any("eventType", eventType)) + } +} + +func (e *EtcdOp) WatchPartition(ctx context.Context, filter api.PartitionFilter) { + e.watchPartitionOnce.Do(func() { + watchChan := e.etcdClient.Watch(ctx, e.partitionPrefix()+"/", clientv3.WithPrefix()) + go func() { + for { + select { + case watchResp, ok := <-watchChan: + if !ok { + log.Info("etcd watch partition channel closed") + return + } + for _, event := range watchResp.Events { + if event.Type != clientv3.EventTypePut { + log.Debug("partition watch event type is not put", zap.String("event type", event.Type.String())) + continue + } + partitionKey := util.ToString(event.Kv.Key) + info := &pb.PartitionInfo{} + err := proto.Unmarshal(event.Kv.Value, info) + if err != nil { + log.Warn("fail to unmarshal the partition info", zap.String("key", partitionKey), zap.Error(err)) + continue + } + if info.State != pb.PartitionState_PartitionCreated || + info.PartitionName == e.defaultPartitionName { + log.Debug("partition state is not created or partition name is default", zap.String("partition name", info.PartitionName), zap.Any("state", info.State)) + continue + } + if filter != nil && filter(info) { + log.Info("partition filter", zap.String("partition name", info.PartitionName)) + continue + } + // TODO checkout it + log.Debug("partition info", zap.Int64("collection_id", info.CollectionId)) + + // collectionID := e.getCollectionIDFromPartitionKey(partitionKey) + // if collectionID == 0 { + // log.Warn("fail to get the collection id", zap.String("key", partitionKey)) + // continue + // } + // collectionName, ok := e.collectionID2Name.Load(collectionID) + // if !ok { + // collectionName = e.getCollectionNameByID(ctx, collectionID) + // if collectionName == "" { + // log.Warn("not found the collection", zap.Int64("collection_id", collectionID), + // zap.Int64("partition_id", info.PartitionID), + // zap.String("partition_name", info.PartitionName)) + // continue + // } + // } + + log.Debug("get a new partition in the watch process", zap.String("key", partitionKey)) + e.subscribePartitionEvent.Range(func(key string, value api.PartitionEventConsumer) bool { + if value != nil && value(info) { + log.Info("the partition has been consumed", zap.String("key", partitionKey), zap.String("task_id", key)) + return false + } + return true + }) + log.Info("the partition hasn't been consumed", zap.String("key", partitionKey)) + } + case <-ctx.Done(): + log.Info("watch partition context done") + return + } + } + }() + }) +} + +func (e *EtcdOp) getCollectionIDFromPartitionKey(key string) int64 { + subString := strings.Split(key[len(e.partitionPrefix())+1:], "/") + if len(subString) != 2 { + log.Warn("the key is invalid", zap.String("key", key), zap.Strings("sub", subString)) + return 0 + } + id, err := strconv.ParseInt(subString[0], 10, 64) + if err != nil { + log.Warn("fail to parse the collection id", zap.String("id", subString[0]), zap.Error(err)) + return 0 + } + return id +} + +func (e *EtcdOp) getCollectionNameByID(ctx context.Context, collectionID int64) string { + var ( + resp *clientv3.GetResponse + err error + ) + + // TODO the db should be considered, 1 is default db id + key := path.Join(e.collectionPrefix(), "1", strconv.FormatInt(collectionID, 10)) + resp, err = util.EtcdGetWithContext(ctx, e.etcdClient, key) + if err != nil { + log.Warn("fail to get the collection data", zap.Int64("collection_id", collectionID), zap.Error(err)) + return "" + } + if len(resp.Kvs) == 0 { + log.Warn("the collection isn't existed", zap.Int64("collection_id", collectionID)) + return "" + } + info := &pb.CollectionInfo{} + err = proto.Unmarshal(resp.Kvs[0].Value, info) + if err != nil { + log.Warn("fail to unmarshal collection info, maybe it's a deleted collection", + zap.Int64("collection_id", collectionID), + zap.String("value", util.Base64Encode(resp.Kvs[0].Value)), + zap.Error(err)) + return "" + } + collectionName := info.Schema.GetName() + e.collectionID2Name.Store(collectionID, collectionName) + + return collectionName +} + +func (e *EtcdOp) GetAllCollection(ctx context.Context, filter api.CollectionFilter) ([]*pb.CollectionInfo, error) { + resp, err := util.EtcdGetWithContext(ctx, e.etcdClient, e.collectionPrefix()+"/", clientv3.WithPrefix()) + if err != nil { + log.Warn("fail to get all collection data", zap.Error(err)) + return nil, err + } + var existedCollectionInfos []*pb.CollectionInfo + + for _, kv := range resp.Kvs { + info := &pb.CollectionInfo{} + err = proto.Unmarshal(kv.Value, info) + if err != nil { + log.Info("fail to unmarshal collection info, maybe it's a deleted collection", zap.String("key", util.ToString(kv.Key)), zap.String("value", util.Base64Encode(kv.Value)), zap.Error(err)) + continue + } + if info.State != pb.CollectionState_CollectionCreated { + log.Info("not created collection", zap.String("key", util.ToString(kv.Key))) + continue + } + if filter != nil && filter(info) { + log.Info("the collection info is filtered", zap.String("key", util.ToString(kv.Key))) + continue + } + err = e.fillCollectionField(info) + if err != nil { + log.Warn("fail to fill collection field", zap.String("key", util.ToString(kv.Key)), zap.Error(err)) + return nil, err + } + e.collectionID2Name.Store(info.ID, info.Schema.Name) + existedCollectionInfos = append(existedCollectionInfos, info) + } + return existedCollectionInfos, nil +} + +func (e *EtcdOp) fillCollectionField(info *pb.CollectionInfo) error { + prefix := path.Join(e.fieldPrefix(), strconv.FormatInt(info.ID, 10)) + "/" + resp, err := util.EtcdGet(e.etcdClient, prefix, clientv3.WithPrefix()) + log := log.With(zap.String("prefix", prefix)) + if err != nil { + log.Warn("fail to get the collection field data", zap.Error(err)) + return err + } + if len(resp.Kvs) == 0 { + msg := "not found the collection field data" + log.Warn(msg) + return errors.New(msg) + } + var fields []*schemapb.FieldSchema + for _, kv := range resp.Kvs { + field := &schemapb.FieldSchema{} + err = proto.Unmarshal(kv.Value, field) + if err != nil { + log.Warn("fail to unmarshal filed schema info", zap.String("key", util.ToString(kv.Key)), zap.Error(err)) + return err + } + if field.Name == common.MetaFieldName { + info.Schema.EnableDynamicField = true + continue + } + // if the field id is less than 100, it is a system field, skip it. + if field.FieldID < 100 { + continue + } + fields = append(fields, field) + } + info.Schema.Fields = fields + return nil +} + +func (e *EtcdOp) GetCollectionNameByID(ctx context.Context, id int64) string { + collectionName, ok := e.collectionID2Name.Load(id) + if !ok { + collectionName = e.getCollectionNameByID(ctx, id) + if collectionName == "" { + log.Warn("not found the collection", zap.Int64("collection_id", id)) + } + } + return collectionName +} diff --git a/core/reader/factory_api.go b/core/reader/factory_api.go index d0a55017..376c8e2b 100644 --- a/core/reader/factory_api.go +++ b/core/reader/factory_api.go @@ -21,20 +21,16 @@ import ( "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/zilliztech/milvus-cdc/core/config" - "github.com/zilliztech/milvus-cdc/core/util" ) -//go:generate mockery --name=FactoryCreator --filename=factory_creator_mock.go --output=../mocks --with-expecter type FactoryCreator interface { - util.CDCMark NewPmsFactory(cfg *config.PulsarConfig) msgstream.Factory NewKmsFactory(cfg *config.KafkaConfig) msgstream.Factory } -type DefaultFactoryCreator struct { - util.CDCMark -} +type DefaultFactoryCreator struct{} func NewDefaultFactoryCreator() FactoryCreator { return &DefaultFactoryCreator{} @@ -44,14 +40,16 @@ func (d *DefaultFactoryCreator) NewPmsFactory(cfg *config.PulsarConfig) msgstrea return msgstream.NewPmsFactory( ¶mtable.ServiceParam{ PulsarCfg: paramtable.PulsarConfig{ - Address: config.NewParamItem(cfg.Address), - WebAddress: config.NewParamItem(cfg.WebAddress), - WebPort: config.NewParamItem(strconv.Itoa(cfg.WebPort)), - MaxMessageSize: config.NewParamItem(cfg.MaxMessageSize), - AuthPlugin: config.NewParamItem(""), - AuthParams: config.NewParamItem("{}"), - Tenant: config.NewParamItem(cfg.Tenant), - Namespace: config.NewParamItem(cfg.Namespace), + Address: config.NewParamItem(cfg.Address), + WebAddress: config.NewParamItem(cfg.WebAddress), + WebPort: config.NewParamItem(strconv.Itoa(cfg.WebPort)), + MaxMessageSize: config.NewParamItem(cfg.MaxMessageSize), + AuthPlugin: config.NewParamItem(""), + AuthParams: config.NewParamItem("{}"), + Tenant: config.NewParamItem(cfg.Tenant), + Namespace: config.NewParamItem(cfg.Namespace), + RequestTimeout: config.NewParamItem("60"), + EnableClientMetrics: config.NewParamItem("false"), }, MQCfg: paramtable.MQConfig{ ReceiveBufSize: config.NewParamItem("16"), @@ -72,6 +70,7 @@ func (d *DefaultFactoryCreator) NewKmsFactory(cfg *config.KafkaConfig) msgstream SecurityProtocol: config.NewParamItem(""), ConsumerExtraConfig: config.NewParamGroup(), ProducerExtraConfig: config.NewParamGroup(), + ReadTimeout: config.NewParamItem("10"), }, MQCfg: paramtable.MQConfig{ ReceiveBufSize: config.NewParamItem("16"), diff --git a/core/reader/milvus_reader.go b/core/reader/milvus_reader.go deleted file mode 100644 index dec01c05..00000000 --- a/core/reader/milvus_reader.go +++ /dev/null @@ -1,633 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package reader - -import ( - "context" - "encoding/json" - "errors" - "math/rand" - "path" - "strconv" - "strings" - "sync" - - "github.com/milvus-io/milvus/pkg/common" - - "github.com/golang/protobuf/proto" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/pkg/mq/msgstream" - "github.com/milvus-io/milvus/pkg/mq/msgstream/mqwrapper" - "github.com/samber/lo" - "github.com/zilliztech/milvus-cdc/core/config" - "github.com/zilliztech/milvus-cdc/core/model" - "github.com/zilliztech/milvus-cdc/core/pb" - "github.com/zilliztech/milvus-cdc/core/util" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" -) - -var log = util.Log - -const ( - AllCollection = "*" -) - -type CollectionInfo struct { - collectionName string - positions map[string]*commonpb.KeyDataPair -} - -type ShouldReadFunc func(*pb.CollectionInfo) bool - -type MilvusCollectionReader struct { - DefaultReader - - etcdConfig config.MilvusEtcdConfig - mqConfig config.MilvusMQConfig - collections []CollectionInfo - monitor Monitor - dataChanLen int - - etcdCli util.KVApi - factoryCreator FactoryCreator - shouldReadFunc ShouldReadFunc - dataChan chan *model.CDCData - cancelWatch context.CancelFunc - collectionID2Name util.Map[int64, string] - closeStreamFuncs util.SafeArray[func()] - - startOnce sync.Once - quitOnce sync.Once - isQuit util.Value[bool] - - // Please no read or write it excluding in the beginning of readStreamData method - readingSteamCollection []int64 - readingLock sync.Mutex - - dbID int64 -} - -func NewMilvusCollectionReader(options ...config.Option[*MilvusCollectionReader]) (*MilvusCollectionReader, error) { - reader := &MilvusCollectionReader{ - monitor: NewDefaultMonitor(), - factoryCreator: NewDefaultFactoryCreator(), - dataChanLen: 10, - dbID: 1, - } - reader.shouldReadFunc = reader.getDefaultShouldReadFunc() - for _, option := range options { - option.Apply(reader) - } - var err error - reader.etcdCli, err = util.GetEtcdClient(reader.etcdConfig.Endpoints) - if err != nil { - log.Warn("fail to get etcd client", zap.Error(err)) - return nil, err - } - reader.dataChan = make(chan *model.CDCData, reader.dataChanLen) - reader.isQuit.Store(false) - return reader, nil -} - -func (reader *MilvusCollectionReader) StartRead(ctx context.Context) <-chan *model.CDCData { - reader.startOnce.Do(func() { - watchCtx, cancel := context.WithCancel(context.Background()) - reader.cancelWatch = cancel - - log.Info("start read collection") - - go reader.watchCollection(watchCtx) - go reader.watchPartition(watchCtx) - go reader.getAllCollections() - }) - - return reader.dataChan -} - -func (reader *MilvusCollectionReader) getDefaultShouldReadFunc() ShouldReadFunc { - return func(i *pb.CollectionInfo) bool { - return lo.ContainsBy(reader.collections, func(info CollectionInfo) bool { - return reader.collectionName(i) == info.collectionName - }) - } -} - -func (reader *MilvusCollectionReader) watchCollection(watchCtx context.Context) { - // watch collection prefix to avoid new collection while getting the all collection - // TODO improvement watch single instance - watchChan := reader.etcdCli.Watch(watchCtx, reader.collectionPrefix()+"/", clientv3.WithPrefix()) - for { - select { - case watchResp, ok := <-watchChan: - if !ok { - reader.monitor.WatchChanClosed() - return - } - lo.ForEach(watchResp.Events, func(event *clientv3.Event, _ int) { - if event.Type != clientv3.EventTypePut { - return - } - collectionKey := util.ToString(event.Kv.Key) - log.Info("collection key", zap.String("key", collectionKey)) - if !strings.HasPrefix(collectionKey, reader.collectionPrefix()) { - return - } - info := &pb.CollectionInfo{} - err := proto.Unmarshal(event.Kv.Value, info) - if err != nil { - log.Warn("fail to unmarshal the collection info", zap.String("key", collectionKey), zap.String("value", util.Base64Encode(event.Kv.Value)), zap.Error(err)) - reader.monitor.OnFailUnKnowCollection(collectionKey, err) - return - } - if info.State == pb.CollectionState_CollectionCreated { - go func() { - log.Info("collection key created", zap.String("key", collectionKey)) - if reader.shouldReadFunc(info) { - log.Info("collection key should created", zap.String("key", collectionKey)) - reader.collectionID2Name.Store(info.ID, reader.collectionName(info)) - err := util.Do(context.Background(), func() error { - err := reader.fillCollectionField(info) - if err != nil { - log.Info("fail to get collection fields, retry...", zap.String("key", collectionKey), zap.Error(err)) - } - return err - }) - if err != nil { - log.Warn("fail to get collection fields", zap.String("key", collectionKey), zap.Error(err)) - reader.monitor.OnFailGetCollectionInfo(info.ID, reader.collectionName(info), err) - return - } - reader.readStreamData(info, true) - } - }() - } - }) - case <-watchCtx.Done(): - log.Info("watch collection context done") - return - } - } -} - -func (reader *MilvusCollectionReader) watchPartition(watchCtx context.Context) { - watchChan := reader.etcdCli.Watch(watchCtx, reader.partitionPrefix()+"/", clientv3.WithPrefix()) - for { - select { - case watchResp, ok := <-watchChan: - if !ok { - return - } - lo.ForEach(watchResp.Events, func(event *clientv3.Event, _ int) { - if event.Type != clientv3.EventTypePut { - return - } - partitionKey := util.ToString(event.Kv.Key) - if !strings.HasPrefix(partitionKey, reader.partitionPrefix()) { - return - } - id := reader.getCollectionIDFromPartitionKey(partitionKey) - if id == 0 { - log.Warn("fail to get the collection id", zap.String("key", partitionKey)) - return - } - info := &pb.PartitionInfo{} - err := proto.Unmarshal(event.Kv.Value, info) - if err != nil { - log.Warn("fail to unmarshal the partition info", zap.String("key", partitionKey), zap.String("value", util.Base64Encode(event.Kv.Value)), zap.Error(err)) - // TODO monitor - // reader.monitor.OnFailUnKnowCollection(collectionKey, err) - return - } - if info.State == pb.PartitionState_PartitionCreated && - info.PartitionName != reader.etcdConfig.DefaultPartitionName { - collectionName, ok := reader.collectionID2Name.Load(id) - if !ok { - collectionName = reader.getCollectionNameByID(id) - if collectionName == "" { - log.Warn("not found the collection", zap.Int64("collection_id", id), - zap.Int64("partition_id", info.PartitionID), - zap.String("partition_name", info.PartitionName)) - return - } - } - data := &model.CDCData{ - Msg: &msgstream.CreatePartitionMsg{ - BaseMsg: msgstream.BaseMsg{}, - CreatePartitionRequest: msgpb.CreatePartitionRequest{ - Base: &commonpb.MsgBase{ - MsgType: commonpb.MsgType_CreatePartition, - }, - CollectionName: collectionName, - PartitionName: info.PartitionName, - CollectionID: info.CollectionId, - PartitionID: info.PartitionID, - }, - }, - } - reader.sendData(data) - } - }) - case <-watchCtx.Done(): - log.Info("watch partition context done") - return - } - } -} - -func (reader *MilvusCollectionReader) getCollectionNameByID(collectionID int64) string { - var ( - resp *clientv3.GetResponse - err error - ) - - if reader.dbID == 0 { - resp, err = util.EtcdGet(reader.etcdCli, path.Join(reader.collectionPrefix(), strconv.FormatInt(collectionID, 10))) - } else { - resp, err = util.EtcdGet(reader.etcdCli, path.Join(reader.collectionPrefix(), strconv.FormatInt(reader.dbID, 10), strconv.FormatInt(collectionID, 10))) - } - if err != nil { - log.Warn("fail to get all collection data", zap.Int64("collection_id", collectionID), zap.Error(err)) - return "" - } - if len(resp.Kvs) == 0 { - log.Warn("the collection isn't existed", zap.Int64("collection_id", collectionID)) - return "" - } - info := &pb.CollectionInfo{} - err = proto.Unmarshal(resp.Kvs[0].Value, info) - if err != nil { - log.Warn("fail to unmarshal collection info, maybe it's a deleted collection", - zap.Int64("collection_id", collectionID), - zap.String("value", util.Base64Encode(resp.Kvs[0].Value)), - zap.Error(err)) - return "" - } - collectionName := reader.collectionName(info) - if reader.shouldReadFunc(info) { - reader.collectionID2Name.Store(collectionID, collectionName) - return collectionName - } - log.Warn("the collection can't be read", zap.Int64("id", collectionID), zap.String("name", collectionName)) - return "" -} - -func (reader *MilvusCollectionReader) getAllCollections() { - var ( - existedCollectionInfos []*pb.CollectionInfo - err error - ) - - existedCollectionInfos, err = reader.getCollectionInfo() - if err != nil { - log.Warn("fail to get collection", zap.Error(err)) - reader.monitor.OnFailUnKnowCollection(reader.collectionPrefix(), err) - } - for _, info := range existedCollectionInfos { - if info.State == pb.CollectionState_CollectionCreated { - go reader.readStreamData(info, false) - } - } -} - -func (reader *MilvusCollectionReader) collectionPrefix() string { - c := reader.etcdConfig - collectionKey := c.CollectionKey - if reader.dbID != 0 { - collectionKey = c.CollectionWithDBKey - } - return util.GetCollectionPrefix(c.RootPath, c.MetaSubPath, collectionKey) -} - -func (reader *MilvusCollectionReader) partitionPrefix() string { - c := reader.etcdConfig - return util.GetPartitionPrefix(c.RootPath, c.MetaSubPath, c.PartitionKey) -} - -func (reader *MilvusCollectionReader) fieldPrefix() string { - c := reader.etcdConfig - return util.GetFieldPrefix(c.RootPath, c.MetaSubPath, c.FiledKey) -} - -func (reader *MilvusCollectionReader) collectionName(info *pb.CollectionInfo) string { - return info.Schema.Name -} - -func (reader *MilvusCollectionReader) getCollectionIDFromPartitionKey(key string) int64 { - subStrs := strings.Split(key[len(reader.partitionPrefix())+1:], "/") - if len(subStrs) != 2 { - log.Warn("the key is invalid", zap.String("key", key), zap.Strings("sub", subStrs)) - return 0 - } - id, err := strconv.ParseInt(subStrs[0], 10, 64) - if err != nil { - log.Warn("fail to parse the collection id", zap.String("id", subStrs[0]), zap.Error(err)) - return 0 - } - return id -} - -// getCollectionInfo The return value meanings are respectively: -// 1. collection infos that the collection have existed -// 2. error message -func (reader *MilvusCollectionReader) getCollectionInfo() ([]*pb.CollectionInfo, error) { - resp, err := util.EtcdGet(reader.etcdCli, reader.collectionPrefix()+"/", clientv3.WithPrefix()) - if err != nil { - log.Warn("fail to get all collection data", zap.Error(err)) - return nil, err - } - var existedCollectionInfos []*pb.CollectionInfo - - for _, kv := range resp.Kvs { - info := &pb.CollectionInfo{} - err = proto.Unmarshal(kv.Value, info) - if err != nil { - log.Warn("fail to unmarshal collection info, maybe it's a deleted collection", zap.String("key", util.ToString(kv.Key)), zap.String("value", util.Base64Encode(kv.Value)), zap.Error(err)) - continue - } - if reader.shouldReadFunc(info) { - reader.collectionID2Name.Store(info.ID, reader.collectionName(info)) - log.Info("get the collection that it need to be replicated", zap.String("name", reader.collectionName(info)), zap.String("key", util.ToString(kv.Key))) - err = reader.fillCollectionField(info) - if err != nil { - return existedCollectionInfos, err - } - existedCollectionInfos = append(existedCollectionInfos, info) - } - } - return existedCollectionInfos, nil -} - -func (reader *MilvusCollectionReader) fillCollectionField(info *pb.CollectionInfo) error { - filedPrefix := reader.fieldPrefix() - prefix := path.Join(filedPrefix, strconv.FormatInt(info.ID, 10)) + "/" - resp, err := util.EtcdGet(reader.etcdCli, prefix, clientv3.WithPrefix()) - if err != nil { - log.Warn("fail to get the collection field data", zap.String("prefix", prefix), zap.Error(err)) - return err - } - if len(resp.Kvs) == 0 { - err = errors.New("not found the collection field data") - log.Warn(err.Error(), zap.String("prefix", filedPrefix)) - return err - } - var fields []*schemapb.FieldSchema - for _, kv := range resp.Kvs { - field := &schemapb.FieldSchema{} - err = proto.Unmarshal(kv.Value, field) - if err != nil { - log.Warn("fail to unmarshal filed schema info", - zap.String("key", util.ToString(kv.Key)), zap.String("value", util.Base64Encode(kv.Value)), zap.Error(err)) - return err - } - if field.Name == common.MetaFieldName { - info.Schema.EnableDynamicField = true - continue - } - if field.FieldID >= 100 { - fields = append(fields, field) - } - } - info.Schema.Fields = fields - return nil -} - -func (reader *MilvusCollectionReader) readStreamData(info *pb.CollectionInfo, sendCreateMsg bool) { - isRepeatCollection := func(id int64) bool { - reader.readingLock.Lock() - defer reader.readingLock.Unlock() - - if lo.Contains(reader.readingSteamCollection, id) { - return true - } - reader.readingSteamCollection = append(reader.readingSteamCollection, id) - return false - } - if isRepeatCollection(info.ID) { - return - } - reader.monitor.OnSuccessGetACollectionInfo(info.ID, reader.collectionName(info)) - - if sendCreateMsg { - schemaByte, err := json.Marshal(info.Schema) - if err != nil { - log.Warn("fail to marshal the collection schema", zap.Error(err)) - reader.monitor.OnFailReadStream(info.ID, reader.collectionName(info), "unknown", err) - return - } - createCollectionMsg := &msgstream.CreateCollectionMsg{ - BaseMsg: msgstream.BaseMsg{ - HashValues: []uint32{0}, - }, - CreateCollectionRequest: msgpb.CreateCollectionRequest{ - Base: &commonpb.MsgBase{ - MsgType: commonpb.MsgType_CreateCollection, - }, - CollectionName: reader.collectionName(info), - CollectionID: info.ID, - Schema: schemaByte, - }, - } - reader.sendData(&model.CDCData{ - Msg: createCollectionMsg, - Extra: map[string]any{ - model.ShardNumKey: info.ShardsNum, - model.ConsistencyLevelKey: info.ConsistencyLevel, - model.CollectionPropertiesKey: info.Properties, - }, - }) - } - - vchannels := info.VirtualChannelNames - barrierManager := NewDataBarrierManager(len(vchannels), reader.sendData) - log.Info("read vchannels", zap.Strings("channels", vchannels)) - for _, vchannel := range vchannels { - position, err := reader.collectionPosition(info, vchannel) - handleError := func() { - reader.monitor.OnFailReadStream(info.ID, reader.collectionName(info), vchannel, err) - reader.isQuit.Store(true) - } - if err != nil { - log.Warn("fail to find the collection position", zap.String("vchannel", vchannel), zap.Error(err)) - handleError() - return - } - stream, err := reader.msgStream() - if err != nil { - log.Warn("fail to new message stream", zap.String("vchannel", vchannel), zap.Error(err)) - handleError() - return - } - msgChan, err := reader.msgStreamChan(vchannel, position, stream) - if err != nil { - stream.Close() - log.Warn("fail to get message stream chan", zap.String("vchannel", vchannel), zap.Error(err)) - handleError() - return - } - reader.closeStreamFuncs.Append(stream.Close) - go reader.readMsg(reader.collectionName(info), info.ID, vchannel, msgChan, barrierManager) - } -} - -func (reader *MilvusCollectionReader) collectionPosition(info *pb.CollectionInfo, vchannelName string) (*msgstream.MsgPosition, error) { - pchannel := util.ToPhysicalChannel(vchannelName) - for _, collection := range reader.collections { - if collection.collectionName == reader.collectionName(info) && - collection.positions != nil { - if pair, ok := collection.positions[pchannel]; ok { - return &msgstream.MsgPosition{ - ChannelName: vchannelName, - MsgID: pair.GetData(), - }, nil - } - } - } - // return util.GetChannelStartPosition(vchannelName, info.StartPositions) - return nil, nil -} - -func (reader *MilvusCollectionReader) msgStream() (msgstream.MsgStream, error) { - var factory msgstream.Factory - if reader.mqConfig.Pulsar.Address != "" { - factory = reader.factoryCreator.NewPmsFactory(&reader.mqConfig.Pulsar) - } else if reader.mqConfig.Kafka.Address != "" { - factory = reader.factoryCreator.NewKmsFactory(&reader.mqConfig.Kafka) - } else { - return nil, errors.New("fail to get the msg stream, check the mqConfig param") - } - stream, err := factory.NewMsgStream(context.Background()) - if err != nil { - log.Warn("fail to new the msg stream", zap.Error(err)) - } - return stream, err -} - -func (reader *MilvusCollectionReader) msgStreamChan(vchannel string, position *msgstream.MsgPosition, stream msgstream.MsgStream) (<-chan *msgstream.MsgPack, error) { - consumeSubName := vchannel + string(rand.Int31()) - pchannelName := util.ToPhysicalChannel(vchannel) - stream.AsConsumer(context.Background(), []string{pchannelName}, consumeSubName, mqwrapper.SubscriptionPositionLatest) - if position == nil { - return stream.Chan(), nil - } - position.ChannelName = pchannelName - err := stream.Seek(context.Background(), []*msgstream.MsgPosition{position}) - if err != nil { - stream.Close() - log.Warn("fail to seek the msg position", zap.String("vchannel", vchannel), zap.Error(err)) - return nil, err - } - - return stream.Chan(), nil -} - -func (reader *MilvusCollectionReader) readMsg(collectionName string, collectionID int64, vchannelName string, - c <-chan *msgstream.MsgPack, - barrierManager *DataBarrierManager) { - for { - if reader.isQuit.Load() && barrierManager.IsEmpty() { - return - } - msgPack := <-c - if msgPack == nil { - return - } - for _, msg := range msgPack.Msgs { - msgType := msg.Type() - if reader.filterMsgType(msgType) { - continue - } - log.Info("msgType", zap.Any("msg_type", msgType)) - if reader.filterMsg(collectionName, collectionID, msg) { - continue - } - data := &model.CDCData{ - Msg: msg, - } - if barrierManager.IsBarrierData(data) { - if dropPartitionMsg, ok := msg.(*msgstream.DropPartitionMsg); ok { - dropPartitionMsg.CollectionName = collectionName - } - barrierManager.AddData(vchannelName, data) - if _, ok := msg.(*msgstream.DropCollectionMsg); ok { - return - } - continue - } - reader.sendData(&model.CDCData{ - Msg: msg, - Extra: map[string]any{ - model.CollectionIDKey: collectionID, - model.CollectionNameKey: collectionName, - }, - }) - } - } -} - -func (reader *MilvusCollectionReader) filterMsgType(msgType commonpb.MsgType) bool { - return msgType == commonpb.MsgType_TimeTick -} - -func (reader *MilvusCollectionReader) filterMsg(collectionName string, collectionID int64, msg msgstream.TsMsg) bool { - if x, ok := msg.(interface{ GetCollectionName() string }); ok { - notEqual := x.GetCollectionName() != collectionName - if notEqual { - log.Warn("filter msg", - zap.String("current_collection_name", collectionName), - zap.String("msg_collection_name", x.GetCollectionName()), - zap.Any("msg_type", msg.Type())) - reader.monitor.OnFilterReadMsg(msg.Type().String()) - } - return notEqual - } - if y, ok := msg.(interface{ GetCollectionID() int64 }); ok { - notEqual := y.GetCollectionID() != collectionID - if notEqual { - log.Warn("filter msg", - zap.Int64("current_collection_id", collectionID), - zap.Int64("msg_collection_name", y.GetCollectionID()), - zap.Any("msg_type", msg.Type())) - reader.monitor.OnFilterReadMsg(msg.Type().String()) - } - return notEqual - } - return true -} - -func (reader *MilvusCollectionReader) CancelWatchCollection() { - if reader.cancelWatch != nil { - reader.cancelWatch() - } -} - -func (reader *MilvusCollectionReader) QuitRead(ctx context.Context) { - reader.quitOnce.Do(func() { - reader.isQuit.Store(true) - reader.CancelWatchCollection() - reader.closeStreamFuncs.Range(func(_ int, value func()) bool { - value() - return true - }) - }) -} - -func (reader *MilvusCollectionReader) sendData(data *model.CDCData) { - reader.dataChan <- data -} diff --git a/core/reader/milvus_reader_test.go b/core/reader/milvus_reader_test.go deleted file mode 100644 index ecbb489b..00000000 --- a/core/reader/milvus_reader_test.go +++ /dev/null @@ -1,592 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package reader_test - -import ( - "context" - "fmt" - "strconv" - "testing" - "time" - - "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" - "github.com/milvus-io/milvus/pkg/mq/msgstream" - - "github.com/goccy/go-json" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/zilliztech/milvus-cdc/core/model" - - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "go.etcd.io/etcd/api/v3/mvccpb" - - "github.com/golang/protobuf/proto" - "github.com/zilliztech/milvus-cdc/core/pb" - - "github.com/cockroachdb/errors" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/zilliztech/milvus-cdc/core/util" - clientv3 "go.etcd.io/etcd/client/v3" - - "github.com/zilliztech/milvus-cdc/core/config" - "github.com/zilliztech/milvus-cdc/core/mocks" - "github.com/zilliztech/milvus-cdc/core/reader" -) - -var ( - endpoints = []string{"localhost:2379"} - rootPath = "dev" - metaSubPath = "meta" - collectionKey = "root-coord/collection" - filedKey = "root-coord/fields" - collectionPrefix = util.GetCollectionPrefix(rootPath, metaSubPath, collectionKey) - fieldPrefix = util.GetFieldPrefix(rootPath, metaSubPath, filedKey) - etcdConfig = config.NewMilvusEtcdConfig(config.MilvusEtcdEndpointsOption(endpoints), - config.MilvusEtcdRootPathOption(rootPath), - config.MilvusEtcdMetaSubPathOption(metaSubPath)) - pulsarConfig = config.NewPulsarConfig( - config.PulsarAddressOption(fmt.Sprintf("pulsar://%s:%d", "localhost", 6650)), - config.PulsarWebAddressOption("", 80), - config.PulsarMaxMessageSizeOption(5242880), - config.PulsarTenantOption("public", "default"), - ) - kafkaConfig = config.NewKafkaConfig(config.KafkaAddressOption("localhost:9092")) -) - -func TestNewMilvusCollectionReader(t *testing.T) { - monitor := mocks.NewMonitor(t) - var options []config.Option[*reader.MilvusCollectionReader] - mockEtcdCli := mocks.NewKVApi(t) - mockEtcdCli.On("Endpoints").Return(endpoints) - - util.MockEtcdClient(func(cfg clientv3.Config) (util.KVApi, error) { - return mockEtcdCli, nil - }, func() { - call := mockEtcdCli.On("Status", mock.Anything, endpoints[0]).Return(nil, errors.New("status error")) - defer call.Unset() - _, err := reader.NewMilvusCollectionReader(append(options, - reader.EtcdOption(etcdConfig), - reader.MqOption(config.PulsarConfig{}, config.KafkaConfig{Address: "address"}), - reader.MonitorOption(monitor), - reader.ChanLenOption(10))...) - assert.Error(t, err) - }) - - util.MockEtcdClient(func(cfg clientv3.Config) (util.KVApi, error) { - return mockEtcdCli, nil - }, func() { - call := mockEtcdCli.On("Status", mock.Anything, endpoints[0]).Return(&clientv3.StatusResponse{}, nil) - defer call.Unset() - _, err := reader.NewMilvusCollectionReader(append(options, - reader.EtcdOption(etcdConfig), - reader.MqOption(pulsarConfig, config.KafkaConfig{}), - reader.MonitorOption(monitor), - reader.ChanLenOption(10))...) - assert.NoError(t, err) - }) -} - -func TestReaderGetCollectionInfo(t *testing.T) { - mockEtcdCli := mocks.NewKVApi(t) - mockEtcdCli.On("Endpoints").Return(endpoints) - - util.MockEtcdClient(func(cfg clientv3.Config) (util.KVApi, error) { - return mockEtcdCli, nil - }, func() { - call := mockEtcdCli.On("Status", mock.Anything, endpoints[0]).Return(&clientv3.StatusResponse{}, nil) - defer call.Unset() - collectionName1 := "coll1" - collectionID1 := int64(100) - collectionName2 := "coll2" - collectionID2 := int64(200) - - factoryCreator := mocks.NewFactoryCreator(t) - monitor := mocks.NewMonitor(t) - - util.EtcdOpRetryTime = 1 - defer func() { - util.EtcdOpRetryTime = 5 - }() - - t.Run("get error", func(t *testing.T) { - var options []config.Option[*reader.MilvusCollectionReader] - options = append(options, reader.CollectionInfoOption(collectionName1, nil)) - options = append(options, reader.CollectionInfoOption(collectionName2, nil)) - collectionReader, err := reader.NewMilvusCollectionReader(append(options, - reader.DBOption(int64(0)), - reader.FactoryCreatorOption(factoryCreator), - reader.EtcdOption(etcdConfig), - reader.MqOption(pulsarConfig, config.KafkaConfig{}), - reader.MonitorOption(monitor), - reader.ChanLenOption(10))...) - assert.NoError(t, err) - getCall := mockEtcdCli.On("Get", mock.Anything, collectionPrefix+"/", mock.Anything).Return(nil, errors.New("get error")) - watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(make(clientv3.WatchChan)) - unknownCall := monitor.On("OnFailUnKnowCollection", collectionPrefix, mock.Anything).Return() - defer func() { - watchCall.Unset() - getCall.Unset() - unknownCall.Unset() - }() - collectionReader.StartRead(context.Background()) - time.Sleep(time.Second) - collectionReader.QuitRead(context.Background()) - monitor.AssertCalled(t, "OnFailUnKnowCollection", collectionPrefix, mock.Anything) - }) - - info1 := &pb.CollectionInfo{ - ID: collectionID1, - Schema: &schemapb.CollectionSchema{ - Name: collectionName1, - }, - } - byte1, _ := proto.Marshal(info1) - info2 := &pb.CollectionInfo{ - ID: collectionID2, - Schema: &schemapb.CollectionSchema{ - Name: collectionName2, - }, - } - byte2, _ := proto.Marshal(info2) - field := &schemapb.FieldSchema{ - FieldID: 101, - } - fieldByte3, _ := proto.Marshal(field) - - t.Run("field error", func(t *testing.T) { - getCall := mockEtcdCli.EXPECT().Get(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) (*clientv3.GetResponse, error) { - if s == collectionPrefix+"/" { - return &clientv3.GetResponse{ - Kvs: []*mvccpb.KeyValue{ - { - Value: byte2, - }, - { - Value: byte1, - }, - }, - }, nil - } - return nil, errors.New("get error") - }) - watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(make(clientv3.WatchChan)) - unknownCall := monitor.On("OnFailUnKnowCollection", collectionPrefix, mock.Anything).Return() - defer func() { - getCall.Unset() - watchCall.Unset() - unknownCall.Unset() - }() - - var options []config.Option[*reader.MilvusCollectionReader] - options = append(options, reader.CollectionInfoOption(collectionName1, nil)) - collectionReader, err := reader.NewMilvusCollectionReader(append(options, - reader.DBOption(int64(0)), - reader.FactoryCreatorOption(factoryCreator), - reader.EtcdOption(etcdConfig), - reader.MqOption(pulsarConfig, config.KafkaConfig{}), - reader.MonitorOption(monitor), - reader.ChanLenOption(10))...) - assert.NoError(t, err) - collectionReader.StartRead(context.Background()) - time.Sleep(time.Second) - collectionReader.QuitRead(context.Background()) - }) - - t.Run("no field error", func(t *testing.T) { - getCall := mockEtcdCli.EXPECT().Get(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) (*clientv3.GetResponse, error) { - if s == collectionPrefix+"/" { - return &clientv3.GetResponse{ - Kvs: []*mvccpb.KeyValue{ - { - Value: byte2, - }, - { - Value: byte1, - }, - }, - }, nil - } else if s == fieldPrefix+"/"+strconv.FormatInt(collectionID1, 10)+"/" { - return &clientv3.GetResponse{ - Kvs: []*mvccpb.KeyValue{}, - }, nil - } - return nil, errors.New("get error") - }) - watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(make(clientv3.WatchChan)) - unknownCall := monitor.On("OnFailUnKnowCollection", collectionPrefix, mock.Anything).Return() - defer func() { - getCall.Unset() - watchCall.Unset() - unknownCall.Unset() - }() - - var options []config.Option[*reader.MilvusCollectionReader] - options = append(options, reader.CollectionInfoOption(collectionName1, nil)) - collectionReader, err := reader.NewMilvusCollectionReader(append(options, - reader.DBOption(int64(0)), - reader.FactoryCreatorOption(factoryCreator), - reader.EtcdOption(etcdConfig), - reader.MqOption(pulsarConfig, config.KafkaConfig{}), - reader.MonitorOption(monitor), - reader.ChanLenOption(10))...) - assert.NoError(t, err) - collectionReader.StartRead(context.Background()) - time.Sleep(time.Second) - collectionReader.QuitRead(context.Background()) - }) - - t.Run("success", func(t *testing.T) { - getCall := mockEtcdCli.EXPECT().Get(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) (*clientv3.GetResponse, error) { - if s == collectionPrefix+"/" { - return &clientv3.GetResponse{ - Kvs: []*mvccpb.KeyValue{ - { - Value: byte2, - }, - { - Value: byte1, - }, - }, - }, nil - } else if s == fieldPrefix+"/"+strconv.FormatInt(collectionID1, 10)+"/" { - return &clientv3.GetResponse{ - Kvs: []*mvccpb.KeyValue{ - { - Value: fieldByte3, - }, - }, - }, nil - } - return nil, errors.New("get error") - }) - watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(make(clientv3.WatchChan)) - asuccessCall := monitor.On("OnSuccessGetACollectionInfo", collectionID1, collectionName1).Return() - successCall := monitor.On("OnSuccessGetAllCollectionInfo").Return() - defer func() { - getCall.Unset() - watchCall.Unset() - asuccessCall.Unset() - successCall.Unset() - }() - - var options []config.Option[*reader.MilvusCollectionReader] - options = append(options, reader.CollectionInfoOption(collectionName1, nil)) - collectionReader, err := reader.NewMilvusCollectionReader(append(options, - reader.DBOption(int64(0)), - reader.FactoryCreatorOption(factoryCreator), - reader.EtcdOption(etcdConfig), - reader.MqOption(pulsarConfig, config.KafkaConfig{}), - reader.MonitorOption(monitor), - reader.ChanLenOption(10))...) - assert.NoError(t, err) - collectionReader.StartRead(context.Background()) - time.Sleep(time.Second) - collectionReader.QuitRead(context.Background()) - }) - }) -} - -func TestReaderWatchCollectionInfo(t *testing.T) { - mockEtcdCli := mocks.NewKVApi(t) - mockEtcdCli.On("Endpoints").Return(endpoints) - call := mockEtcdCli.On("Status", mock.Anything, endpoints[0]).Return(&clientv3.StatusResponse{}, nil) - defer call.Unset() - collectionName1 := "coll1" - collectionID1 := int64(100) - factoryCreator := mocks.NewFactoryCreator(t) - field := &schemapb.FieldSchema{ - FieldID: 101, - } - fieldByte3, _ := proto.Marshal(field) - - util.MockEtcdClient(func(cfg clientv3.Config) (util.KVApi, error) { - return mockEtcdCli, nil - }, func() { - monitor := mocks.NewMonitor(t) - - t.Run("watch chan close", func(t *testing.T) { - getCall := mockEtcdCli.EXPECT().Get(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) (*clientv3.GetResponse, error) { - if s == collectionPrefix+"/" { - return &clientv3.GetResponse{ - Kvs: []*mvccpb.KeyValue{}, - }, nil - } else if s == fieldPrefix+"/"+strconv.FormatInt(collectionID1, 10)+"/" { - return &clientv3.GetResponse{ - Kvs: []*mvccpb.KeyValue{ - { - Value: fieldByte3, - }, - }, - }, nil - } - return nil, errors.New("get error") - }) - watchChan := make(chan clientv3.WatchResponse) - var onlyReadChan clientv3.WatchChan = watchChan - watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(onlyReadChan) - closeCall := monitor.On("WatchChanClosed").Return() - - defer func() { - getCall.Unset() - watchCall.Unset() - closeCall.Unset() - }() - close(watchChan) - - var options []config.Option[*reader.MilvusCollectionReader] - options = append(options, reader.CollectionInfoOption(collectionName1, nil)) - collectionReader, err := reader.NewMilvusCollectionReader(append(options, - reader.DBOption(int64(0)), - reader.FactoryCreatorOption(factoryCreator), - reader.EtcdOption(etcdConfig), - reader.MqOption(pulsarConfig, config.KafkaConfig{}), - reader.MonitorOption(monitor), - reader.ChanLenOption(10))...) - assert.NoError(t, err) - collectionReader.StartRead(context.Background()) - time.Sleep(time.Second) - collectionReader.QuitRead(context.Background()) - }) - }) - - util.MockEtcdClient(func(cfg clientv3.Config) (util.KVApi, error) { - return mockEtcdCli, nil - }, func() { - monitor := mocks.NewMonitor(t) - t.Run("send msg", func(t *testing.T) { - msgStream1 := mocks.NewMsgStream(t) - msgStream2 := mocks.NewMsgStream(t) - factory := mocks.NewFactory(t) - pmsCall := factoryCreator.EXPECT().NewPmsFactory(mock.Anything).RunAndReturn(func(c *config.PulsarConfig) msgstream.Factory { - return factory - }) - defer pmsCall.Unset() - i := 0 - factory.EXPECT().NewMsgStream(mock.Anything).RunAndReturn(func(ctx context.Context) (msgstream.MsgStream, error) { - if i == 0 { - i++ - return msgStream1, nil - } - return msgStream2, nil - }) - - shardNum := int32(4) - level := commonpb.ConsistencyLevel_Session - kv := &commonpb.KeyValuePair{Key: "foo", Value: "123"} - info1 := &pb.CollectionInfo{ - ID: collectionID1, - Schema: &schemapb.CollectionSchema{ - Name: collectionName1, - }, - State: pb.CollectionState_CollectionCreated, - ShardsNum: shardNum, - ConsistencyLevel: level, - Properties: []*commonpb.KeyValuePair{ - kv, - }, - VirtualChannelNames: []string{"p1_v1", "p2_v1"}, - StartPositions: []*commonpb.KeyDataPair{ - { - Key: "p1", - Data: []byte("foo1"), - }, - { - Key: "p2", - Data: []byte("foo2"), - }, - }, - } - byte1, _ := proto.Marshal(info1) - - getCall := mockEtcdCli.EXPECT().Get(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) (*clientv3.GetResponse, error) { - if s == collectionPrefix+"/" { - return &clientv3.GetResponse{ - Kvs: []*mvccpb.KeyValue{}, - }, nil - } else if s == fieldPrefix+"/"+strconv.FormatInt(collectionID1, 10)+"/" { - return &clientv3.GetResponse{ - Kvs: []*mvccpb.KeyValue{ - { - Value: fieldByte3, - }, - }, - }, nil - } - return nil, errors.New("get error") - }) - watchChan := make(chan clientv3.WatchResponse, 10) - var onlyReadChan clientv3.WatchChan = watchChan - watchCall := mockEtcdCli.EXPECT().Watch(mock.Anything, mock.Anything, mock.Anything).Return(onlyReadChan).RunAndReturn(func(ctx context.Context, s string, option ...clientv3.OpOption) clientv3.WatchChan { - if s != collectionPrefix+"/" { - closeChan := make(chan clientv3.WatchResponse, 10) - close(closeChan) - return closeChan - } - return onlyReadChan - }) - //watchCall := mockEtcdCli.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(onlyReadChan) - asuccessCall := monitor.On("OnSuccessGetACollectionInfo", collectionID1, collectionName1).Return() - filterCall := monitor.On("OnFilterReadMsg", "Delete").Return() - msgStream1.EXPECT().AsConsumer(mock.Anything, mock.Anything, mock.Anything).Return() - msgStream2.EXPECT().AsConsumer(mock.Anything, mock.Anything, mock.Anything).Return() - msgStream1.EXPECT().Close().Return() - msgStream2.EXPECT().Close().Return() - msgStream1.EXPECT().Seek(mock.Anything).RunAndReturn(func(positions []*msgstream.MsgPosition) error { - if positions[0].ChannelName == "p1" { - assert.EqualValues(t, []byte("hello"), positions[0].MsgID) - return nil - } - return errors.New("consume error") - }) - msgStream2.EXPECT().Seek(mock.Anything).RunAndReturn(func(positions []*msgstream.MsgPosition) error { - if positions[0].ChannelName == "p2" { - assert.EqualValues(t, []byte("foo2"), positions[0].MsgID) - return nil - } - return errors.New("consume error") - }) - ch1 := make(chan *msgstream.MsgPack, 10) - ch2 := make(chan *msgstream.MsgPack, 10) - msgStream1.EXPECT().Chan().Return(ch1) - msgStream2.EXPECT().Chan().Return(ch2) - defer func() { - getCall.Unset() - watchCall.Unset() - asuccessCall.Unset() - filterCall.Unset() - }() - - var options []config.Option[*reader.MilvusCollectionReader] - options = append(options, reader.CollectionInfoOption(collectionName1, map[string]*commonpb.KeyDataPair{ - "p1": {Key: "p1", Data: []byte("hello")}, - "p2": {Key: "p2", Data: []byte("foo2")}, - })) - collectionReader, err := reader.NewMilvusCollectionReader(append(options, - reader.DBOption(int64(0)), - reader.FactoryCreatorOption(factoryCreator), - reader.EtcdOption(etcdConfig), - reader.MqOption(pulsarConfig, config.KafkaConfig{}), - reader.MonitorOption(monitor), - reader.ChanLenOption(10))...) - assert.NoError(t, err) - cdcChan := collectionReader.StartRead(context.Background()) - ch1 <- &msgstream.MsgPack{ - Msgs: []msgstream.TsMsg{ - &msgstream.TimeTickMsg{ - TimeTickMsg: msgpb.TimeTickMsg{Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_TimeTick}}, - }, - &msgstream.InsertMsg{ - InsertRequest: msgpb.InsertRequest{ - Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_Insert}, - CollectionName: collectionName1, - CollectionID: collectionID1, - }, - }, - &msgstream.DropCollectionMsg{ - DropCollectionRequest: msgpb.DropCollectionRequest{ - Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_DropCollection}, - CollectionName: collectionName1, - CollectionID: collectionID1, - }, - }, - }, - } - ch2 <- &msgstream.MsgPack{ - Msgs: []msgstream.TsMsg{ - &msgstream.InsertMsg{ - InsertRequest: msgpb.InsertRequest{ - Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_Delete}, - CollectionName: "xxxxx", - }, - }, - &msgstream.DeleteMsg{ - DeleteRequest: msgpb.DeleteRequest{ - Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_Delete}, - CollectionName: collectionName1, - CollectionID: collectionID1, - }, - }, - &msgstream.DropCollectionMsg{ - DropCollectionRequest: msgpb.DropCollectionRequest{ - Base: &commonpb.MsgBase{MsgType: commonpb.MsgType_DropCollection}, - CollectionName: collectionName1, - CollectionID: collectionID1, - }, - }, - }, - } - - watchChan <- clientv3.WatchResponse{ - Events: []*clientv3.Event{ - { - Type: clientv3.EventTypePut, - Kv: &mvccpb.KeyValue{ - Key: []byte(collectionPrefix + "/" + strconv.FormatInt(collectionID1, 10)), - Value: byte1, - }, - }, - }, - } - - time.Sleep(time.Second) - collectionReader.QuitRead(context.Background()) - - //create message - cdcData := <-cdcChan - util.Log.Info("xxxxxxxxx") - assert.EqualValues(t, shardNum, cdcData.Extra[model.ShardNumKey]) - assert.EqualValues(t, level, cdcData.Extra[model.ConsistencyLevelKey]) - receiveKv := cdcData.Extra[model.CollectionPropertiesKey].([]*commonpb.KeyValuePair)[0] - assert.EqualValues(t, kv.Key, receiveKv.Key) - assert.EqualValues(t, kv.Value, receiveKv.Value) - createCollectionMsg := cdcData.Msg.(*msgstream.CreateCollectionMsg) - assert.EqualValues(t, collectionID1, createCollectionMsg.CollectionID) - assert.EqualValues(t, collectionName1, createCollectionMsg.CollectionName) - schema := &schemapb.CollectionSchema{ - Name: collectionName1, - Fields: []*schemapb.FieldSchema{field}, - } - schemaByte, _ := json.Marshal(schema) - assert.EqualValues(t, schemaByte, createCollectionMsg.Schema) - - hasInsert, hasDelete := false, false - checkInsertOrDelete := func(d *model.CDCData) { - if insertMsg, ok := d.Msg.(*msgstream.InsertMsg); ok { - hasInsert = true - assert.Equal(t, collectionName1, insertMsg.CollectionName) - return - } - if deleteMsg, ok := d.Msg.(*msgstream.DeleteMsg); ok { - hasDelete = true - assert.Equal(t, collectionName1, deleteMsg.CollectionName) - return - } - } - cdcData = <-cdcChan - checkInsertOrDelete(cdcData) - cdcData = <-cdcChan - checkInsertOrDelete(cdcData) - assert.True(t, hasInsert) - assert.True(t, hasDelete) - - cdcData = <-cdcChan - dropMsg := cdcData.Msg.(*msgstream.DropCollectionMsg) - assert.Equal(t, collectionName1, dropMsg.CollectionName) - assert.Len(t, cdcData.Extra[model.DropCollectionMsgsKey], 1) - }) - }) -} diff --git a/core/reader/monitor.go b/core/reader/monitor.go index dc055744..4c19e2ad 100644 --- a/core/reader/monitor.go +++ b/core/reader/monitor.go @@ -18,10 +18,7 @@ package reader import "github.com/zilliztech/milvus-cdc/core/util" -//go:generate mockery --name=Monitor --filename=monitor_mock.go --output=../mocks type Monitor interface { - util.CDCMark - OnFailUnKnowCollection(key string, err error) OnFailGetCollectionInfo(collectionID int64, collectionName string, err error) OnFailReadStream(collectionID int64, collectionName string, vchannel string, err error) diff --git a/core/reader/reader_api.go b/core/reader/reader_api.go index 502497d3..96ca546b 100644 --- a/core/reader/reader_api.go +++ b/core/reader/reader_api.go @@ -16,32 +16,26 @@ package reader -import ( - "context" - - "github.com/zilliztech/milvus-cdc/core/model" - . "github.com/zilliztech/milvus-cdc/core/util" -) - -//go:generate mockery --name=CDCReader --filename=cdc_reader_mock.go --output=../mocks -type CDCReader interface { - CDCMark - - StartRead(ctx context.Context) <-chan *model.CDCData - QuitRead(ctx context.Context) -} - -// DefaultReader All CDCReader implements should combine it -type DefaultReader struct { - CDCMark -} - -// StartRead the return value is nil, -// and if you receive the data from the nil chan, will block forever, not panic -func (d *DefaultReader) StartRead(ctx context.Context) <-chan *model.CDCData { - log.Warn("StartRead is not implemented, please check it") - return nil -} - -func (d *DefaultReader) QuitRead(ctx context.Context) { -} +// import ( +// "context" +// +// "github.com/zilliztech/milvus-cdc/core/model" +// ) +// +// type CDCReader interface { +// StartRead(ctx context.Context) <-chan *model.CDCData +// QuitRead(ctx context.Context) +// } +// +// // DefaultReader All CDCReader implements should combine it +// type DefaultReader struct{} +// +// // StartRead the return value is nil, +// // and if you receive the data from the nil chan, will block forever, not panic +// func (d *DefaultReader) StartRead(ctx context.Context) <-chan *model.CDCData { +// log.Warn("StartRead is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultReader) QuitRead(ctx context.Context) { +// } diff --git a/core/reader/replicate_channel_manager.go b/core/reader/replicate_channel_manager.go new file mode 100644 index 00000000..6f9a6563 --- /dev/null +++ b/core/reader/replicate_channel_manager.go @@ -0,0 +1,414 @@ +package reader + +import ( + "context" + "strings" + "sync" + "time" + + "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/mq/msgstream" + "github.com/milvus-io/milvus/pkg/mq/msgstream/mqwrapper" + "github.com/milvus-io/milvus/pkg/util/commonpbutil" + "github.com/milvus-io/milvus/pkg/util/retry" + "go.uber.org/zap" + + "github.com/zilliztech/milvus-cdc/core/api" + "github.com/zilliztech/milvus-cdc/core/config" + "github.com/zilliztech/milvus-cdc/core/model" + "github.com/zilliztech/milvus-cdc/core/pb" +) + +var _ api.ChannelManager = (*replicateChannelManager)(nil) + +type replicateChannelManager struct { + factory msgstream.Factory + apiEventChan chan *api.ReplicateAPIEvent + targetClient api.TargetAPI + messageBufferSize int + + channelLock sync.RWMutex + channelHandlerMap map[string]*replicateChannelHandler + + collectionLock sync.Mutex + replicateCollections map[int64]chan struct{} + channelChan chan string +} + +func NewReplicateChannelManager(mqConfig config.MQConfig, client api.TargetAPI, messageBufferSize int) (api.ChannelManager, error) { + factoryCreator := NewDefaultFactoryCreator() + var factory msgstream.Factory + if mqConfig.Pulsar.Address != "" { + factory = factoryCreator.NewPmsFactory(&mqConfig.Pulsar) + } else if mqConfig.Kafka.Address != "" { + factory = factoryCreator.NewKmsFactory(&mqConfig.Kafka) + } else { + log.Warn("mqConfig is empty") + return nil, errors.New("fail to get the msg stream, check the mqConfig param") + } + + return &replicateChannelManager{ + factory: factory, + apiEventChan: make(chan *api.ReplicateAPIEvent, 10), + targetClient: client, + messageBufferSize: messageBufferSize, + channelHandlerMap: make(map[string]*replicateChannelHandler), + replicateCollections: make(map[int64]chan struct{}), + channelChan: make(chan string, 10), + }, nil +} + +func (r *replicateChannelManager) StartReadCollection(ctx context.Context, info *pb.CollectionInfo, seekPositions []*msgpb.MsgPosition) error { + if _, err := r.targetClient.GetCollectionInfo(ctx, info.Schema.GetName()); err != nil { + select { + case r.apiEventChan <- &api.ReplicateAPIEvent{ + EventType: api.ReplicateCreateCollection, + CollectionInfo: info, + }: + case <-ctx.Done(): + log.Warn("context is done in the start read collection") + return ctx.Err() + } + } + + var targetInfo *model.CollectionInfo + var err error + + err = retry.Do(ctx, func() error { + targetInfo, err = r.targetClient.GetCollectionInfo(ctx, info.Schema.Name) + return err + }, retry.Sleep(time.Second), retry.MaxSleepTime(10*time.Second), retry.Attempts(5)) + if err != nil { + log.Warn("failed to get target collection info", zap.Error(err)) + return err + } + + for i, channel := range targetInfo.PChannels { + if !strings.Contains(targetInfo.VChannels[i], channel) { + log.Warn("physical channel not equal", zap.Strings("p", targetInfo.PChannels), zap.Strings("v", targetInfo.VChannels)) + return errors.New("the physical channels are not matched to the virtual channels") + } + } + + getSeekPosition := func(channelName string) *msgpb.MsgPosition { + for _, seekPosition := range seekPositions { + if seekPosition.ChannelName == channelName { + return seekPosition + } + } + return nil + } + + var barrierChan chan struct{} + r.collectionLock.Lock() + if _, ok := r.replicateCollections[info.ID]; ok { + r.collectionLock.Unlock() + return nil + } + barrierChan = make(chan struct{}, len(info.StartPositions)) + closeChan := make(chan struct{}) + r.replicateCollections[info.ID] = closeChan + go func(dest int) { + current := 0 + for current < dest { + select { + case <-closeChan: + return + case <-barrierChan: + current++ + } + } + select { + case <-closeChan: + case r.apiEventChan <- &api.ReplicateAPIEvent{ + EventType: api.ReplicateDropCollection, + CollectionInfo: info, + }: + } + }(len(info.StartPositions)) + r.collectionLock.Unlock() + + var successChannels []string + for i, position := range info.StartPositions { + channelName := position.GetKey() + err := r.startReadChannel(&model.SourceCollectionInfo{ + PChannelName: channelName, + CollectionID: info.ID, + SeekPosition: getSeekPosition(channelName), + }, &model.TargetCollectionInfo{ + CollectionID: targetInfo.CollectionID, + PartitionInfo: targetInfo.Partitions, + PChannel: targetInfo.PChannels[i], + VChannel: targetInfo.VChannels[i], + BarrierChan: barrierChan, + }) + if err != nil { + log.Warn("start read channel failed", zap.String("channel", channelName), zap.Int64("collection_id", info.ID), zap.Error(err)) + for _, channel := range successChannels { + r.stopReadChannel(channel, info.ID) + } + return err + } + successChannels = append(successChannels, channelName) + log.Info("start read channel", zap.String("channel", channelName)) + } + return err +} + +func (r *replicateChannelManager) StopReadCollection(ctx context.Context, info *pb.CollectionInfo) error { + for _, position := range info.StartPositions { + r.stopReadChannel(position.GetKey(), info.ID) + } + r.collectionLock.Lock() + defer r.collectionLock.Unlock() + closeChan, ok := r.replicateCollections[info.ID] + if ok { + close(closeChan) + delete(r.replicateCollections, info.ID) + } + return nil +} + +func (r *replicateChannelManager) GetChannelChan() <-chan string { + return r.channelChan +} + +func (r *replicateChannelManager) GetMsgChan(pChannel string) <-chan *msgstream.MsgPack { + r.channelLock.RLock() + defer r.channelLock.RUnlock() + + handler := r.channelHandlerMap[pChannel] + if handler != nil { + return handler.msgPackChan + } + return nil +} + +func (r *replicateChannelManager) GetEventChan() <-chan *api.ReplicateAPIEvent { + return r.apiEventChan +} + +// startReadChannel start read channel +// pChannelName: source milvus channel name, collectionID: source milvus collection id, startPosition: start position of the source milvus collection +// targetInfo: target collection info, it will be used to replace the message info in the source milvus channel +func (r *replicateChannelManager) startReadChannel(sourceInfo *model.SourceCollectionInfo, targetInfo *model.TargetCollectionInfo) error { + r.channelLock.Lock() + defer r.channelLock.Unlock() + var err error + + channelHandler, ok := r.channelHandlerMap[sourceInfo.PChannelName] + if !ok { + channelHandler, err = newReplicateChannelHandler(sourceInfo, targetInfo, &model.HandlerOpts{ + MessageBufferSize: r.messageBufferSize, + Factory: r.factory, + }) + if err != nil { + log.Warn("fail to new replicate channel handler", + zap.String("channel_name", sourceInfo.PChannelName), zap.Int64("collection_id", sourceInfo.CollectionID), zap.Error(err)) + return err + } + r.channelHandlerMap[sourceInfo.PChannelName] = channelHandler + r.channelChan <- sourceInfo.PChannelName + return nil + } + channelHandler.AddCollection(sourceInfo.CollectionID, targetInfo) + return nil +} + +func (r *replicateChannelManager) stopReadChannel(pChannelName string, collectionID int64) { + r.channelLock.Lock() + defer r.channelLock.Unlock() + channelHandler, ok := r.channelHandlerMap[pChannelName] + if !ok { + return + } + channelHandler.RemoveCollection(collectionID) + if channelHandler.IsEmpty() { + channelHandler.Close() + } +} + +type replicateChannelHandler struct { + pChannelName string + targetPChannel string + stream msgstream.MsgStream + // key: source milvus collectionID value: *model.TargetCollectionInfo + collectionRecords *sync.Map + msgPackChan chan *msgstream.MsgPack +} + +func (r *replicateChannelHandler) AddCollection(collectionID int64, targetInfo *model.TargetCollectionInfo) { + r.collectionRecords.Store(collectionID, targetInfo) +} + +func (r *replicateChannelHandler) RemoveCollection(collectionID int64) { + r.collectionRecords.Delete(collectionID) +} + +func (r *replicateChannelHandler) IsEmpty() bool { + isEmpty := true + r.collectionRecords.Range(func(key, value interface{}) bool { + isEmpty = false + return false + }) + return isEmpty +} + +func (r *replicateChannelHandler) Chan() chan<- *msgstream.MsgPack { + return r.msgPackChan +} + +func (r *replicateChannelHandler) Close() { + r.stream.Close() +} + +func (r *replicateChannelHandler) startReadChannel() { + go func() { + for { + msgPack, ok := <-r.stream.Chan() + if !ok { + close(r.msgPackChan) + return + } + r.msgPackChan <- r.handlePack(msgPack) + } + }() +} + +func (r *replicateChannelHandler) handlePack(pack *msgstream.MsgPack) *msgstream.MsgPack { + newPack := &msgstream.MsgPack{ + BeginTs: pack.BeginTs, + EndTs: pack.EndTs, + StartPositions: pack.StartPositions, + EndPositions: pack.EndPositions, + Msgs: make([]msgstream.TsMsg, 0), + } + needTsMsg := false + pChannel := r.targetPChannel + for _, msg := range pack.Msgs { + if msg.Type() == commonpb.MsgType_CreateCollection || + msg.Type() == commonpb.MsgType_CreatePartition { + continue + } + if y, ok := msg.(interface{ GetCollectionID() int64 }); ok { + targetInfo, ok := r.collectionRecords.Load(y.GetCollectionID()) + i := 0 + for !ok && i < 10 { + log.Warn("filter msg in replicate channel handler", + zap.Any("current_collections", r.collectionRecords), + zap.Int64("msg_collection_id", y.GetCollectionID()), + zap.Any("msg_type", msg.Type())) + time.Sleep(500 * time.Millisecond) + // TODO it needs to be considered when supporting the specific collection in a task + // TODO maybe wait too long time? + targetInfo, ok = r.collectionRecords.Load(y.GetCollectionID()) + i++ + } + if !ok && i == 10 { + log.Warn("filter msg in replicate channel handler", zap.Int64("msg_collection_id", y.GetCollectionID()), zap.Any("msg_type", msg.Type())) + continue + } + info := targetInfo.(*model.TargetCollectionInfo) + switch realMsg := msg.(type) { + case *msgstream.InsertMsg: + realMsg.CollectionID = info.CollectionID + realMsg.PartitionID = info.PartitionInfo[realMsg.PartitionName] + realMsg.ShardName = info.VChannel + case *msgstream.DeleteMsg: + realMsg.CollectionID = info.CollectionID + if realMsg.PartitionName != "" { + realMsg.PartitionID = info.PartitionInfo[realMsg.PartitionName] + } + realMsg.ShardName = info.VChannel + case *msgstream.DropCollectionMsg: + realMsg.CollectionID = info.CollectionID + info.BarrierChan <- struct{}{} + needTsMsg = true + case *msgstream.DropPartitionMsg: + realMsg.CollectionID = info.CollectionID + if realMsg.PartitionName != "" { + realMsg.PartitionID = info.PartitionInfo[realMsg.PartitionName] + } + // TODO barrier partition + } + originPosition := msg.Position() + msg.SetPosition(&msgpb.MsgPosition{ + ChannelName: info.PChannel, + MsgID: originPosition.GetMsgID(), + MsgGroup: originPosition.GetMsgGroup(), + Timestamp: originPosition.GetTimestamp(), + }) + if pChannel != info.PChannel { + log.Panic("pChannel not equal", zap.String("pChannel", pChannel), zap.String("info_pChannel", info.PChannel)) + } + } + newPack.Msgs = append(newPack.Msgs, msg) + } + for _, position := range newPack.StartPositions { + position.ChannelName = pChannel + } + for _, position := range newPack.EndPositions { + position.ChannelName = pChannel + } + if len(newPack.Msgs) != 0 { + log.Info("receive msg pack", zap.Any("msg_pack", newPack)) + } + needTsMsg = needTsMsg || len(newPack.Msgs) == 0 + if needTsMsg { + timeTickResult := msgpb.TimeTickMsg{ + Base: commonpbutil.NewMsgBase( + commonpbutil.WithMsgType(commonpb.MsgType_TimeTick), + commonpbutil.WithMsgID(0), + commonpbutil.WithTimeStamp(pack.EndTs), + commonpbutil.WithSourceID(-1), + ), + } + timeTickMsg := &msgstream.TimeTickMsg{ + BaseMsg: msgstream.BaseMsg{ + BeginTimestamp: pack.EndTs, + EndTimestamp: pack.EndTs, + HashValues: []uint32{0}, + }, + TimeTickMsg: timeTickResult, + } + newPack.Msgs = append(newPack.Msgs, timeTickMsg) + } + return newPack +} + +func newReplicateChannelHandler(sourceInfo *model.SourceCollectionInfo, targetInfo *model.TargetCollectionInfo, opts *model.HandlerOpts) (*replicateChannelHandler, error) { + ctx := context.Background() + stream, err := opts.Factory.NewTtMsgStream(ctx) + log := log.With(zap.String("channel_name", sourceInfo.PChannelName), zap.Int64("collection_id", sourceInfo.CollectionID)) + if err != nil { + log.Warn("fail to new the msg stream", zap.Error(err)) + return nil, err + } + err = stream.AsConsumer(ctx, []string{sourceInfo.PChannelName}, sourceInfo.PChannelName, mqwrapper.SubscriptionPositionLatest) + if err != nil { + log.Warn("fail to consume the channel", zap.Error(err)) + stream.Close() + return nil, err + } + if sourceInfo.SeekPosition != nil { + err = stream.Seek(ctx, []*msgstream.MsgPosition{sourceInfo.SeekPosition}) + if err != nil { + log.Warn("fail to seek the msg stream", zap.Error(err)) + stream.Close() + return nil, err + } + } + channelHandler := &replicateChannelHandler{ + pChannelName: sourceInfo.PChannelName, + targetPChannel: targetInfo.PChannel, + collectionRecords: &sync.Map{}, + stream: stream, + msgPackChan: make(chan *msgstream.MsgPack, opts.MessageBufferSize), + } + channelHandler.AddCollection(sourceInfo.CollectionID, targetInfo) + channelHandler.startReadChannel() + return channelHandler, nil +} diff --git a/core/reader/target_client.go b/core/reader/target_client.go new file mode 100644 index 00000000..03bddd49 --- /dev/null +++ b/core/reader/target_client.go @@ -0,0 +1,67 @@ +package reader + +import ( + "context" + + "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-sdk-go/v2/client" + "github.com/milvus-io/milvus/pkg/log" + "go.uber.org/zap" + + "github.com/zilliztech/milvus-cdc/core/api" + "github.com/zilliztech/milvus-cdc/core/model" +) + +var _ api.TargetAPI = (*TargetClient)(nil) + +type TargetClient struct { + client client.Client +} + +type TargetConfig struct { + Address string + Username string + Password string + EnableTLS bool +} + +func NewTarget(ctx context.Context, config TargetConfig) (*TargetClient, error) { + targetClient := &TargetClient{} + var err error + targetClient.client, err = client.NewClient(ctx, client.Config{ + Address: config.Address, + Username: config.Username, + Password: config.Password, + EnableTLSAuth: config.EnableTLS, + }) + if err != nil { + log.Warn("fail to new target client", zap.String("address", config.Address), zap.Error(err)) + return nil, err + } + return targetClient, nil +} + +func (t *TargetClient) GetCollectionInfo(ctx context.Context, collectionName string) (*model.CollectionInfo, error) { + collectionInfo := &model.CollectionInfo{} + collection, err := t.client.DescribeCollection(ctx, collectionName) + if err != nil { + log.Warn("fail to describe collection", zap.Error(err)) + return nil, err + } + collectionInfo.CollectionID = collection.ID + collectionInfo.CollectionName = collectionName + collectionInfo.PChannels = collection.PhysicalChannels + collectionInfo.VChannels = collection.VirtualChannels + + partition, err := t.client.ShowPartitions(ctx, collectionName) + if err != nil || len(partition) == 0 { + log.Warn("failed to show partitions", zap.Error(err)) + return nil, errors.New("fail to show the partitions") + } + partitionInfo := make(map[string]int64, len(partition)) + for _, e := range partition { + partitionInfo[e.Name] = e.ID + } + collectionInfo.Partitions = partitionInfo + return collectionInfo, nil +} diff --git a/core/util/etcd.go b/core/util/etcd.go index f97fa523..6678df70 100644 --- a/core/util/etcd.go +++ b/core/util/etcd.go @@ -21,11 +21,14 @@ import ( "path" "time" + "github.com/milvus-io/milvus/pkg/log" clientv3 "go.etcd.io/etcd/client/v3" "go.uber.org/zap" ) var ( + // TODO config + EtcdOpTimeout = 10 * time.Second EtcdOpRetryTime uint = 5 ) @@ -56,9 +59,9 @@ func GetEtcdClient(endpoints []string) (KVApi, error) { etcdCli, err := newEtcdClient(clientv3.Config{ Endpoints: endpoints, DialTimeout: 5 * time.Second, - Logger: Log, + Logger: log.L(), }) - errLog := Log.With(zap.Strings("endpoints", endpoints), zap.Error(err)) + errLog := log.With(zap.Strings("endpoints", endpoints), zap.Error(err)) if err != nil { errLog.Warn("fail to etcd client") return nil, err @@ -92,14 +95,12 @@ func EtcdPut(etcdCli KVApi, key, val string, opts ...clientv3.OpOption) error { }, Attempts(EtcdOpRetryTime)) } -func EtcdGet(etcdCli KVApi, key string, opts ...clientv3.OpOption) (*clientv3.GetResponse, error) { - ctx, cancel := context.WithTimeout(context.Background(), EtcdOpTimeout) - defer cancel() - var ( - resp *clientv3.GetResponse - err error - ) +func EtcdGetWithContext(ctx context.Context, etcdCli KVApi, key string, opts ...clientv3.OpOption) (*clientv3.GetResponse, error) { + var err error + var resp *clientv3.GetResponse + ctx, cancel := context.WithTimeout(ctx, EtcdOpTimeout) + defer cancel() err = Do(ctx, func() error { resp, err = etcdCli.Get(ctx, key, opts...) return err @@ -107,6 +108,11 @@ func EtcdGet(etcdCli KVApi, key string, opts ...clientv3.OpOption) (*clientv3.Ge return resp, err } +// Deprecated: use EtcdGetWithContext instead +func EtcdGet(etcdCli KVApi, key string, opts ...clientv3.OpOption) (*clientv3.GetResponse, error) { + return EtcdGetWithContext(context.TODO(), etcdCli, key, opts...) +} + func EtcdDelete(etcdCli KVApi, key string, opts ...clientv3.OpOption) error { ctx, cancel := context.WithTimeout(context.Background(), EtcdOpTimeout) defer cancel() diff --git a/core/util/log.go b/core/util/log.go index dc936a2f..e06c79d4 100644 --- a/core/util/log.go +++ b/core/util/log.go @@ -18,18 +18,27 @@ package util import ( "github.com/milvus-io/milvus/pkg/log" - "go.uber.org/zap" ) -var ( - Log *zap.Logger -) +// var ( +// Log *zap.Logger +// ) func init() { - conf := &log.Config{Level: "info", Stdout: true, File: log.FileLogConfig{ - RootPath: "/tmp/cdc_log", - Filename: "cdc.log", - }} - Log, _, _ = log.InitLogger(conf) - Log = Log.WithOptions(zap.AddCallerSkip(-1)) + conf := &log.Config{ + Level: "info", + Stdout: true, + File: log.FileLogConfig{ + RootPath: "/tmp/cdc_log", + Filename: "cdc.log", + }, + } + + // TODO delete it + // Log, _, _ = log.InitLogger(conf) + // Log = Log.WithOptions(zap.AddCallerSkip(-1)) + + // l, p, _ := log.InitLogger(conf, zap.AddCallerSkip(1)) + l, p, _ := log.InitLogger(conf) + log.ReplaceGlobals(l, p) } diff --git a/core/util/msg.go b/core/util/msg.go index 4eafa242..b31d25af 100644 --- a/core/util/msg.go +++ b/core/util/msg.go @@ -21,10 +21,12 @@ import ( "fmt" "reflect" + "go.uber.org/zap" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/mq/msgstream" - "go.uber.org/zap" ) var ( @@ -201,7 +203,7 @@ func AppendFieldData(dst []*schemapb.FieldData, src []*schemapb.FieldData, idx i dstScalar.GetStringData().Data = append(dstScalar.GetStringData().Data, srcScalar.StringData.Data[idx]) } default: - Log.Error("Not supported field type", zap.String("field type", fieldData.Type.String())) + log.Error("Not supported field type", zap.String("field type", fieldData.Type.String())) } case *schemapb.FieldData_Vectors: dim := fieldType.Vectors.Dim @@ -243,7 +245,7 @@ func AppendFieldData(dst []*schemapb.FieldData, src []*schemapb.FieldData, idx i dstVector.GetFloatVector().Data = append(dstVector.GetFloatVector().Data, srcVector.FloatVector.Data[idx*dim:(idx+1)*dim]...) } default: - Log.Error("Not supported field type", zap.String("field type", fieldData.Type.String())) + log.Error("Not supported field type", zap.String("field type", fieldData.Type.String())) } } } @@ -255,7 +257,7 @@ func DeleteFieldData(dst []*schemapb.FieldData) { switch fieldType := fieldData.Field.(type) { case *schemapb.FieldData_Scalars: if dst[i] == nil || dst[i].GetScalars() == nil { - Log.Info("empty field data can't be deleted") + log.Info("empty field data can't be deleted") return } dstScalar := dst[i].GetScalars() @@ -273,11 +275,11 @@ func DeleteFieldData(dst []*schemapb.FieldData) { case *schemapb.ScalarField_StringData: dstScalar.GetStringData().Data = dstScalar.GetStringData().Data[:len(dstScalar.GetStringData().Data)-1] default: - Log.Error("wrong field type added", zap.String("field type", fieldData.Type.String())) + log.Error("wrong field type added", zap.String("field type", fieldData.Type.String())) } case *schemapb.FieldData_Vectors: if dst[i] == nil || dst[i].GetVectors() == nil { - Log.Info("empty field data can't be deleted") + log.Info("empty field data can't be deleted") return } dim := fieldType.Vectors.Dim @@ -289,7 +291,7 @@ func DeleteFieldData(dst []*schemapb.FieldData) { case *schemapb.VectorField_FloatVector: dstVector.GetFloatVector().Data = dstVector.GetFloatVector().Data[:len(dstVector.GetFloatVector().Data)-int(dim)] default: - Log.Error("wrong field type added", zap.String("field type", fieldData.Type.String())) + log.Error("wrong field type added", zap.String("field type", fieldData.Type.String())) } } } diff --git a/core/util/retry.go b/core/util/retry.go index 0a92bca3..d2e9473c 100644 --- a/core/util/retry.go +++ b/core/util/retry.go @@ -20,6 +20,7 @@ import ( "context" "time" + "github.com/milvus-io/milvus/pkg/log" "go.uber.org/zap" ) @@ -85,7 +86,7 @@ func Do(ctx context.Context, fn func() error, opts ...Option) error { for i := uint(0); i < c.attempts; i++ { if err := fn(); err != nil { if i%10 == 0 { - Log.Debug("retry func failed", zap.Uint("retry time", i), zap.Error(err)) + log.Debug("retry func failed", zap.Uint("retry time", i), zap.Error(err)) } el = append(el, err) diff --git a/core/util/string.go b/core/util/string.go index eefff61d..23c06f66 100644 --- a/core/util/string.go +++ b/core/util/string.go @@ -22,6 +22,7 @@ import ( "strings" "unsafe" + "github.com/milvus-io/milvus/pkg/log" "go.uber.org/zap" ) @@ -47,7 +48,7 @@ func ToPhysicalChannel(vchannel string) string { func Base64Encode(obj any) string { objByte, err := json.Marshal(obj) if err != nil { - Log.Warn("fail to marshal obj", zap.Any("obj", obj)) + log.Warn("fail to marshal obj", zap.Any("obj", obj)) return "" } return base64.StdEncoding.EncodeToString(objByte) diff --git a/core/writer/channel_writer.go b/core/writer/channel_writer.go new file mode 100644 index 00000000..71eeddb0 --- /dev/null +++ b/core/writer/channel_writer.go @@ -0,0 +1,105 @@ +package writer + +import ( + "context" + + "github.com/cockroachdb/errors" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus-sdk-go/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/mq/msgstream" + "go.uber.org/zap" + + "github.com/zilliztech/milvus-cdc/core/api" +) + +var _ api.Writer = (*ChannelWriter)(nil) + +type ChannelWriter struct { + dataHandler api.DataHandler + messageManager api.MessageManager +} + +func NewChannelWriter(dataHandler api.DataHandler, messageBufferSize int) api.Writer { + return &ChannelWriter{ + dataHandler: dataHandler, + messageManager: NewReplicateMessageManager(dataHandler, messageBufferSize), + } +} + +func (c *ChannelWriter) HandleReplicateAPIEvent(ctx context.Context, apiEvent *api.ReplicateAPIEvent) error { + switch apiEvent.EventType { + case api.ReplicateCreateCollection: + collectionInfo := apiEvent.CollectionInfo + entitySchema := &entity.Schema{} + entitySchema = entitySchema.ReadProto(collectionInfo.Schema) + createParam := &api.CreateCollectionParam{ + Schema: entitySchema, + ShardsNum: collectionInfo.ShardsNum, + ConsistencyLevel: collectionInfo.ConsistencyLevel, + Properties: collectionInfo.Properties, + } + err := c.dataHandler.CreateCollection(ctx, createParam) + if err != nil { + log.Warn("fail to create collection", zap.String("name", createParam.Schema.CollectionName), zap.Error(err)) + } + return err + case api.ReplicateDropCollection: + dropParam := &api.DropCollectionParam{ + CollectionName: apiEvent.CollectionInfo.Schema.GetName(), + } + err := c.dataHandler.DropCollection(ctx, dropParam) + if err != nil { + log.Warn("fail to drop collection", zap.String("name", dropParam.CollectionName), zap.Error(err)) + } + return err + default: + log.Warn("unknown replicate api event", zap.Any("event", apiEvent)) + } + return nil +} + +func (c *ChannelWriter) HandleReplicateMessage(ctx context.Context, channelName string, msgPack *msgstream.MsgPack) (*commonpb.KeyDataPair, error) { + if len(msgPack.Msgs) == 0 { + log.Warn("receive empty message pack", zap.String("channel", channelName)) + return nil, errors.New("receive empty message pack") + } + msgBytesArr := make([][]byte, 0) + for _, msg := range msgPack.Msgs { + msgBytes, err := msg.Marshal(msg) + if err != nil { + log.Warn("failed to marshal msg", zap.Error(err)) + return nil, err + } + if _, ok := msgBytes.([]byte); !ok { + log.Warn("failed to convert msg bytes to []byte") + return nil, err + } + msgBytesArr = append(msgBytesArr, msgBytes.([]byte)) + } + replicateMessageParam := &api.ReplicateMessageParam{ + ChannelName: channelName, + StartPositions: msgPack.StartPositions, + EndPositions: msgPack.EndPositions, + BeginTs: msgPack.BeginTs, + EndTs: msgPack.EndTs, + MsgsBytes: msgBytesArr, + } + errChan := make(chan error, 1) + message := &api.ReplicateMessage{ + Param: replicateMessageParam, + SuccessFunc: func(param *api.ReplicateMessageParam) { + errChan <- nil + }, + FailFunc: func(param *api.ReplicateMessageParam, err error) { + errChan <- err + }, + } + c.messageManager.ReplicateMessage(message) + endPosition := msgPack.EndPositions[len(msgPack.EndPositions)-1] + position := &commonpb.KeyDataPair{ + Key: endPosition.ChannelName, + Data: endPosition.MsgID, + } + return position, <-errChan +} diff --git a/core/writer/config_option.go b/core/writer/config_option.go index 0b2b258b..341be770 100644 --- a/core/writer/config_option.go +++ b/core/writer/config_option.go @@ -17,8 +17,6 @@ package writer import ( - "time" - "github.com/zilliztech/milvus-cdc/core/config" ) @@ -55,40 +53,40 @@ func IgnorePartitionOption(ignore bool) config.Option[*MilvusDataHandler] { }) } -func MilvusFactoryOption(f MilvusClientFactory) config.Option[*MilvusDataHandler] { - return config.OptionFunc[*MilvusDataHandler](func(object *MilvusDataHandler) { - if f != nil { - object.factory = f - } - }) -} - -func HandlerOption(handler CDCDataHandler) config.Option[*CDCWriterTemplate] { - return config.OptionFunc[*CDCWriterTemplate](func(object *CDCWriterTemplate) { - object.handler = handler - }) -} - -func NoBufferOption() config.Option[*CDCWriterTemplate] { - return config.OptionFunc[*CDCWriterTemplate](func(object *CDCWriterTemplate) { - object.bufferConfig = NoBufferConfig - }) -} - -func BufferOption(period time.Duration, size int64, positionFunc NotifyCollectionPositionChangeFunc) config.Option[*CDCWriterTemplate] { - return config.OptionFunc[*CDCWriterTemplate](func(object *CDCWriterTemplate) { - if period > 0 { - object.bufferConfig.Period = period - } - if size > 0 { - object.bufferConfig.Size = size - } - object.bufferUpdatePositionFunc = positionFunc - }) -} - -func ErrorProtectOption(per int32, unit time.Duration) config.Option[*CDCWriterTemplate] { - return config.OptionFunc[*CDCWriterTemplate](func(object *CDCWriterTemplate) { - object.errProtect = NewErrorProtect(per, unit) - }) -} +// func MilvusFactoryOption(f MilvusClientFactory) config.Option[*MilvusDataHandler] { +// return config.OptionFunc[*MilvusDataHandler](func(object *MilvusDataHandler) { +// if f != nil { +// object.factory = f +// } +// }) +// } +// +// func HandlerOption(handler CDCDataHandler) config.Option[*CDCWriterTemplate] { +// return config.OptionFunc[*CDCWriterTemplate](func(object *CDCWriterTemplate) { +// object.handler = handler +// }) +// } +// +// func NoBufferOption() config.Option[*CDCWriterTemplate] { +// return config.OptionFunc[*CDCWriterTemplate](func(object *CDCWriterTemplate) { +// object.bufferConfig = NoBufferConfig +// }) +// } +// +// func BufferOption(period time.Duration, size int64, positionFunc NotifyCollectionPositionChangeFunc) config.Option[*CDCWriterTemplate] { +// return config.OptionFunc[*CDCWriterTemplate](func(object *CDCWriterTemplate) { +// if period > 0 { +// object.bufferConfig.Period = period +// } +// if size > 0 { +// object.bufferConfig.Size = size +// } +// object.bufferUpdatePositionFunc = positionFunc +// }) +// } +// +// func ErrorProtectOption(per int32, unit time.Duration) config.Option[*CDCWriterTemplate] { +// return config.OptionFunc[*CDCWriterTemplate](func(object *CDCWriterTemplate) { +// object.errProtect = NewErrorProtect(per, unit) +// }) +// } diff --git a/core/writer/handler.go b/core/writer/handler.go index 36e79f68..02c79791 100644 --- a/core/writer/handler.go +++ b/core/writer/handler.go @@ -16,154 +16,161 @@ package writer -import ( - "context" - - "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" - - "github.com/zilliztech/milvus-cdc/core/util" - - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-sdk-go/v2/entity" -) - -type CDCDataHandler interface { - util.CDCMark - - CreateCollection(ctx context.Context, param *CreateCollectionParam) error - DropCollection(ctx context.Context, param *DropCollectionParam) error - Insert(ctx context.Context, param *InsertParam) error - Delete(ctx context.Context, param *DeleteParam) error - CreatePartition(ctx context.Context, param *CreatePartitionParam) error - DropPartition(ctx context.Context, param *DropPartitionParam) error - - CreateIndex(ctx context.Context, param *CreateIndexParam) error - DropIndex(ctx context.Context, param *DropIndexParam) error - LoadCollection(ctx context.Context, param *LoadCollectionParam) error - ReleaseCollection(ctx context.Context, param *ReleaseCollectionParam) error - CreateDatabase(ctx context.Context, param *CreateDataBaseParam) error - DropDatabase(ctx context.Context, param *DropDataBaseParam) error - - // NOTE: please add the implements for the DataHandlerWrapper class when adding new interfaces -} - -type DefaultDataHandler struct { - util.CDCMark -} - -func (d *DefaultDataHandler) CreateCollection(ctx context.Context, param *CreateCollectionParam) error { - log.Warn("CreateCollection is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) DropCollection(ctx context.Context, param *DropCollectionParam) error { - log.Warn("DropCollection is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) Insert(ctx context.Context, param *InsertParam) error { - log.Warn("Insert is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) Delete(ctx context.Context, param *DeleteParam) error { - log.Warn("Delete is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) CreatePartition(ctx context.Context, param *CreatePartitionParam) error { - log.Warn("CreatePartition is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) DropPartition(ctx context.Context, param *DropPartitionParam) error { - log.Warn("DropPartition is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) CreateIndex(ctx context.Context, param *CreateIndexParam) error { - log.Warn("CreateIndex is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) DropIndex(ctx context.Context, param *DropIndexParam) error { - log.Warn("DropIndex is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) LoadCollection(ctx context.Context, param *LoadCollectionParam) error { - log.Warn("LoadCollection is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) ReleaseCollection(ctx context.Context, param *ReleaseCollectionParam) error { - log.Warn("ReleaseCollection is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) CreateDatabase(ctx context.Context, param *CreateDataBaseParam) error { - log.Warn("CreateDatabase is not implemented, please check it") - return nil -} - -func (d *DefaultDataHandler) DropDatabase(ctx context.Context, param *DropDataBaseParam) error { - log.Warn("DropDatabase is not implemented, please check it") - return nil -} - -type CreateCollectionParam struct { - Schema *entity.Schema - ShardsNum int32 - ConsistencyLevel commonpb.ConsistencyLevel - Properties []*commonpb.KeyValuePair -} - -type DropCollectionParam struct { - CollectionName string -} - -type InsertParam struct { - CollectionName string - PartitionName string - Columns []entity.Column -} - -type DeleteParam struct { - CollectionName string - PartitionName string - Column entity.Column -} - -type CreatePartitionParam struct { - CollectionName string - PartitionName string -} - -type DropPartitionParam struct { - CollectionName string - PartitionName string -} - -type CreateIndexParam struct { - milvuspb.CreateIndexRequest -} - -type DropIndexParam struct { - milvuspb.DropIndexRequest -} - -type LoadCollectionParam struct { - milvuspb.LoadCollectionRequest -} - -type ReleaseCollectionParam struct { - milvuspb.ReleaseCollectionRequest -} - -type CreateDataBaseParam struct { - milvuspb.CreateDatabaseRequest -} - -type DropDataBaseParam struct { - milvuspb.DropDatabaseRequest -} +// import ( +// "context" +// +// "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" +// "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" +// "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" +// "github.com/milvus-io/milvus-sdk-go/v2/entity" +// ) +// +// type CDCDataHandler interface { +// CreateCollection(ctx context.Context, param *CreateCollectionParam) error +// DropCollection(ctx context.Context, param *DropCollectionParam) error +// Insert(ctx context.Context, param *InsertParam) error +// Delete(ctx context.Context, param *DeleteParam) error +// CreatePartition(ctx context.Context, param *CreatePartitionParam) error +// DropPartition(ctx context.Context, param *DropPartitionParam) error +// +// CreateIndex(ctx context.Context, param *CreateIndexParam) error +// DropIndex(ctx context.Context, param *DropIndexParam) error +// LoadCollection(ctx context.Context, param *LoadCollectionParam) error +// ReleaseCollection(ctx context.Context, param *ReleaseCollectionParam) error +// CreateDatabase(ctx context.Context, param *CreateDataBaseParam) error +// DropDatabase(ctx context.Context, param *DropDataBaseParam) error +// +// ReplicateMessage(ctx context.Context, param *ReplicateMessageParam) error +// // NOTE: please add the implements for the DataHandlerWrapper class when adding new interfaces +// } +// +// type DefaultDataHandler struct{} +// +// func (d *DefaultDataHandler) CreateCollection(ctx context.Context, param *CreateCollectionParam) error { +// log.Warn("CreateCollection is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) DropCollection(ctx context.Context, param *DropCollectionParam) error { +// log.Warn("DropCollection is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) Insert(ctx context.Context, param *InsertParam) error { +// log.Warn("Insert is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) Delete(ctx context.Context, param *DeleteParam) error { +// log.Warn("Delete is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) CreatePartition(ctx context.Context, param *CreatePartitionParam) error { +// log.Warn("CreatePartition is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) DropPartition(ctx context.Context, param *DropPartitionParam) error { +// log.Warn("DropPartition is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) CreateIndex(ctx context.Context, param *CreateIndexParam) error { +// log.Warn("CreateIndex is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) DropIndex(ctx context.Context, param *DropIndexParam) error { +// log.Warn("DropIndex is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) LoadCollection(ctx context.Context, param *LoadCollectionParam) error { +// log.Warn("LoadCollection is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) ReleaseCollection(ctx context.Context, param *ReleaseCollectionParam) error { +// log.Warn("ReleaseCollection is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) CreateDatabase(ctx context.Context, param *CreateDataBaseParam) error { +// log.Warn("CreateDatabase is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) DropDatabase(ctx context.Context, param *DropDataBaseParam) error { +// log.Warn("DropDatabase is not implemented, please check it") +// return nil +// } +// +// func (d *DefaultDataHandler) ReplicateMessage(ctx context.Context, param *ReplicateMessageParam) error { +// log.Warn("Replicate is not implemented, please check it") +// return nil +// } +// +// type CreateCollectionParam struct { +// Schema *entity.Schema +// ShardsNum int32 +// ConsistencyLevel commonpb.ConsistencyLevel +// Properties []*commonpb.KeyValuePair +// } +// +// type DropCollectionParam struct { +// CollectionName string +// } +// +// type InsertParam struct { +// CollectionName string +// PartitionName string +// Columns []entity.Column +// } +// +// type DeleteParam struct { +// CollectionName string +// PartitionName string +// Column entity.Column +// } +// +// type CreatePartitionParam struct { +// CollectionName string +// PartitionName string +// } +// +// type DropPartitionParam struct { +// CollectionName string +// PartitionName string +// } +// +// type CreateIndexParam struct { +// milvuspb.CreateIndexRequest +// } +// +// type DropIndexParam struct { +// milvuspb.DropIndexRequest +// } +// +// type LoadCollectionParam struct { +// milvuspb.LoadCollectionRequest +// } +// +// type ReleaseCollectionParam struct { +// milvuspb.ReleaseCollectionRequest +// } +// +// type CreateDataBaseParam struct { +// milvuspb.CreateDatabaseRequest +// } +// +// type DropDataBaseParam struct { +// milvuspb.DropDatabaseRequest +// } +// +// type ReplicateMessageParam struct { +// ChannelName string +// BeginTs, EndTs uint64 +// MsgsBytes [][]byte +// StartPositions, EndPositions []*msgpb.MsgPosition +// } diff --git a/core/writer/milvus_api.go b/core/writer/milvus_api.go index 8673f755..1eb1b411 100644 --- a/core/writer/milvus_api.go +++ b/core/writer/milvus_api.go @@ -16,56 +16,59 @@ package writer -import ( - "context" - - "github.com/zilliztech/milvus-cdc/core/util" - - "github.com/milvus-io/milvus-sdk-go/v2/client" - "github.com/milvus-io/milvus-sdk-go/v2/entity" -) - -//go:generate mockery --name=MilvusClientAPI --filename=milvus_client_api_mock.go --output=../mocks -type MilvusClientAPI interface { - CreateCollection(ctx context.Context, schema *entity.Schema, shardsNum int32, opts ...client.CreateCollectionOption) error - DropCollection(ctx context.Context, collName string) error - Insert(ctx context.Context, collName string, partitionName string, columns ...entity.Column) (entity.Column, error) - DeleteByPks(ctx context.Context, collName string, partitionName string, ids entity.Column) error - CreatePartition(ctx context.Context, collName string, partitionName string) error - DropPartition(ctx context.Context, collName string, partitionName string) error - - CreateIndex(ctx context.Context, collName string, fieldName string, idx entity.Index, async bool, opts ...client.IndexOption) error - DropIndex(ctx context.Context, collName string, fieldName string, opts ...client.IndexOption) error - LoadCollection(ctx context.Context, collName string, async bool, opts ...client.LoadCollectionOption) error - ReleaseCollection(ctx context.Context, collName string) error - CreateDatabase(ctx context.Context, dbName string) error - DropDatabase(ctx context.Context, dbName string) error -} - -//go:generate mockery --name=MilvusClientFactory --filename=milvus_client_factory_mock.go --output=../mocks -type MilvusClientFactory interface { - util.CDCMark - NewGrpcClientWithTLSAuth(ctx context.Context, addr, username, password string) (MilvusClientAPI, error) - NewGrpcClientWithAuth(ctx context.Context, addr, username, password string) (MilvusClientAPI, error) - NewGrpcClient(ctx context.Context, addr string) (MilvusClientAPI, error) -} - -type DefaultMilvusClientFactory struct { - util.CDCMark -} - -func NewDefaultMilvusClientFactory() MilvusClientFactory { - return &DefaultMilvusClientFactory{} -} - -func (d *DefaultMilvusClientFactory) NewGrpcClientWithTLSAuth(ctx context.Context, addr, username, password string) (MilvusClientAPI, error) { - return client.NewDefaultGrpcClientWithTLSAuth(ctx, addr, username, password) -} - -func (d *DefaultMilvusClientFactory) NewGrpcClientWithAuth(ctx context.Context, addr, username, password string) (MilvusClientAPI, error) { - return client.NewDefaultGrpcClientWithAuth(ctx, addr, username, password) -} - -func (d *DefaultMilvusClientFactory) NewGrpcClient(ctx context.Context, addr string) (MilvusClientAPI, error) { - return client.NewDefaultGrpcClient(ctx, addr) -} +// +// import ( +// "context" +// +// "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" +// "github.com/milvus-io/milvus-sdk-go/v2/client" +// "github.com/milvus-io/milvus-sdk-go/v2/entity" +// +// "github.com/zilliztech/milvus-cdc/core/util" +// ) +// +// type MilvusClientAPI interface { +// CreateCollection(ctx context.Context, schema *entity.Schema, shardsNum int32, opts ...client.CreateCollectionOption) error +// DropCollection(ctx context.Context, collName string) error +// Insert(ctx context.Context, collName string, partitionName string, columns ...entity.Column) (entity.Column, error) +// DeleteByPks(ctx context.Context, collName string, partitionName string, ids entity.Column) error +// CreatePartition(ctx context.Context, collName string, partitionName string) error +// DropPartition(ctx context.Context, collName string, partitionName string) error +// +// CreateIndex(ctx context.Context, collName string, fieldName string, idx entity.Index, async bool, opts ...client.IndexOption) error +// DropIndex(ctx context.Context, collName string, fieldName string, opts ...client.IndexOption) error +// LoadCollection(ctx context.Context, collName string, async bool, opts ...client.LoadCollectionOption) error +// ReleaseCollection(ctx context.Context, collName string) error +// CreateDatabase(ctx context.Context, dbName string) error +// DropDatabase(ctx context.Context, dbName string) error +// ReplicateMessage(ctx context.Context, +// channelName string, beginTs, endTs uint64, +// msgsBytes [][]byte, startPositions, endPositions []*msgpb.MsgPosition) error +// } +// +// type MilvusClientFactory interface { +// util.CDCMark +// NewGrpcClientWithTLSAuth(ctx context.Context, addr, username, password string) (MilvusClientAPI, error) +// NewGrpcClientWithAuth(ctx context.Context, addr, username, password string) (MilvusClientAPI, error) +// NewGrpcClient(ctx context.Context, addr string) (MilvusClientAPI, error) +// } +// +// type DefaultMilvusClientFactory struct { +// util.CDCMark +// } +// +// func NewDefaultMilvusClientFactory() MilvusClientFactory { +// return &DefaultMilvusClientFactory{} +// } +// +// func (d *DefaultMilvusClientFactory) NewGrpcClientWithTLSAuth(ctx context.Context, addr, username, password string) (MilvusClientAPI, error) { +// return client.NewDefaultGrpcClientWithTLSAuth(ctx, addr, username, password) +// } +// +// func (d *DefaultMilvusClientFactory) NewGrpcClientWithAuth(ctx context.Context, addr, username, password string) (MilvusClientAPI, error) { +// return client.NewDefaultGrpcClientWithAuth(ctx, addr, username, password) +// } +// +// func (d *DefaultMilvusClientFactory) NewGrpcClient(ctx context.Context, addr string) (MilvusClientAPI, error) { +// return client.NewDefaultGrpcClient(ctx, addr) +// } diff --git a/core/writer/milvus_handler.go b/core/writer/milvus_handler.go index 2b86b1fd..81d36b61 100644 --- a/core/writer/milvus_handler.go +++ b/core/writer/milvus_handler.go @@ -23,13 +23,16 @@ import ( "github.com/milvus-io/milvus-sdk-go/v2/client" "github.com/milvus-io/milvus-sdk-go/v2/entity" + "github.com/milvus-io/milvus/pkg/log" + "go.uber.org/zap" + + "github.com/zilliztech/milvus-cdc/core/api" "github.com/zilliztech/milvus-cdc/core/config" "github.com/zilliztech/milvus-cdc/core/util" - "go.uber.org/zap" ) type MilvusDataHandler struct { - DefaultDataHandler + api.DataHandler address string username string @@ -38,16 +41,16 @@ type MilvusDataHandler struct { ignorePartition bool // sometimes the has partition api is a deny api connectTimeout int - factory MilvusClientFactory + // factory MilvusClientFactory // TODO support db - milvus MilvusClientAPI + milvus client.Client } // NewMilvusDataHandler options must include AddressOption func NewMilvusDataHandler(options ...config.Option[*MilvusDataHandler]) (*MilvusDataHandler, error) { handler := &MilvusDataHandler{ connectTimeout: 5, - factory: NewDefaultMilvusClientFactory(), + // factory: NewDefaultMilvusClientFactory(), } for _, option := range options { option.Apply(handler) @@ -60,16 +63,22 @@ func NewMilvusDataHandler(options ...config.Option[*MilvusDataHandler]) (*Milvus timeoutContext, cancel := context.WithTimeout(context.Background(), time.Duration(handler.connectTimeout)*time.Second) defer cancel() - switch { - case handler.username != "" && handler.enableTLS: - handler.milvus, err = handler.factory.NewGrpcClientWithTLSAuth(timeoutContext, - handler.address, handler.username, handler.password) - case handler.username != "": - handler.milvus, err = handler.factory.NewGrpcClientWithAuth(timeoutContext, - handler.address, handler.username, handler.password) - default: - handler.milvus, err = handler.factory.NewGrpcClient(timeoutContext, handler.address) - } + // switch { + // case handler.username != "" && handler.enableTLS: + // handler.milvus, err = handler.factory.NewGrpcClientWithTLSAuth(timeoutContext, + // handler.address, handler.username, handler.password) + // case handler.username != "": + // handler.milvus, err = handler.factory.NewGrpcClientWithAuth(timeoutContext, + // handler.address, handler.username, handler.password) + // default: + // handler.milvus, err = handler.factory.NewGrpcClient(timeoutContext, handler.address) + // } + handler.milvus, err = client.NewClient(timeoutContext, client.Config{ + Address: handler.address, + Username: handler.username, + Password: handler.password, + EnableTLSAuth: handler.enableTLS, + }) if err != nil { log.Warn("fail to new the milvus client", zap.Error(err)) return nil, err @@ -77,7 +86,7 @@ func NewMilvusDataHandler(options ...config.Option[*MilvusDataHandler]) (*Milvus return handler, nil } -func (m *MilvusDataHandler) CreateCollection(ctx context.Context, param *CreateCollectionParam) error { +func (m *MilvusDataHandler) CreateCollection(ctx context.Context, param *api.CreateCollectionParam) error { var options []client.CreateCollectionOption for _, property := range param.Properties { options = append(options, client.WithCollectionProperty(property.GetKey(), property.GetValue())) @@ -86,11 +95,11 @@ func (m *MilvusDataHandler) CreateCollection(ctx context.Context, param *CreateC return m.milvus.CreateCollection(ctx, param.Schema, param.ShardsNum, options...) } -func (m *MilvusDataHandler) DropCollection(ctx context.Context, param *DropCollectionParam) error { +func (m *MilvusDataHandler) DropCollection(ctx context.Context, param *api.DropCollectionParam) error { return m.milvus.DropCollection(ctx, param.CollectionName) } -func (m *MilvusDataHandler) Insert(ctx context.Context, param *InsertParam) error { +func (m *MilvusDataHandler) Insert(ctx context.Context, param *api.InsertParam) error { partitionName := param.PartitionName if m.ignorePartition { partitionName = "" @@ -99,7 +108,7 @@ func (m *MilvusDataHandler) Insert(ctx context.Context, param *InsertParam) erro return err } -func (m *MilvusDataHandler) Delete(ctx context.Context, param *DeleteParam) error { +func (m *MilvusDataHandler) Delete(ctx context.Context, param *api.DeleteParam) error { partitionName := param.PartitionName if m.ignorePartition { partitionName = "" @@ -107,43 +116,48 @@ func (m *MilvusDataHandler) Delete(ctx context.Context, param *DeleteParam) erro return m.milvus.DeleteByPks(ctx, param.CollectionName, partitionName, param.Column) } -func (m *MilvusDataHandler) CreatePartition(ctx context.Context, param *CreatePartitionParam) error { +func (m *MilvusDataHandler) CreatePartition(ctx context.Context, param *api.CreatePartitionParam) error { if m.ignorePartition { return nil } return m.milvus.CreatePartition(ctx, param.CollectionName, param.PartitionName) } -func (m *MilvusDataHandler) DropPartition(ctx context.Context, param *DropPartitionParam) error { +func (m *MilvusDataHandler) DropPartition(ctx context.Context, param *api.DropPartitionParam) error { if m.ignorePartition { return nil } return m.milvus.DropPartition(ctx, param.CollectionName, param.PartitionName) } -func (m *MilvusDataHandler) CreateIndex(ctx context.Context, param *CreateIndexParam) error { +func (m *MilvusDataHandler) CreateIndex(ctx context.Context, param *api.CreateIndexParam) error { indexEntity := entity.NewGenericIndex(param.IndexName, "", util.ConvertKVPairToMap(param.ExtraParams)) return m.milvus.CreateIndex(ctx, param.CollectionName, param.FieldName, indexEntity, true, client.WithIndexName(param.IndexName)) } -func (m *MilvusDataHandler) DropIndex(ctx context.Context, param *DropIndexParam) error { +func (m *MilvusDataHandler) DropIndex(ctx context.Context, param *api.DropIndexParam) error { return m.milvus.DropIndex(ctx, param.CollectionName, param.FieldName, client.WithIndexName(param.IndexName)) } -func (m *MilvusDataHandler) LoadCollection(ctx context.Context, param *LoadCollectionParam) error { +func (m *MilvusDataHandler) LoadCollection(ctx context.Context, param *api.LoadCollectionParam) error { // TODO resource group - //return m.milvus.LoadCollection(ctx, param.CollectionName, true, client.WithReplicaNumber(param.ReplicaNumber), client.WithResourceGroups(param.ResourceGroups)) + // return m.milvus.LoadCollection(ctx, param.CollectionName, true, client.WithReplicaNumber(param.ReplicaNumber), client.WithResourceGroups(param.ResourceGroups)) return m.milvus.LoadCollection(ctx, param.CollectionName, true, client.WithReplicaNumber(param.ReplicaNumber)) } -func (m *MilvusDataHandler) ReleaseCollection(ctx context.Context, param *ReleaseCollectionParam) error { +func (m *MilvusDataHandler) ReleaseCollection(ctx context.Context, param *api.ReleaseCollectionParam) error { return m.milvus.ReleaseCollection(ctx, param.CollectionName) } -func (m *MilvusDataHandler) CreateDatabase(ctx context.Context, param *CreateDataBaseParam) error { +func (m *MilvusDataHandler) CreateDatabase(ctx context.Context, param *api.CreateDataBaseParam) error { return m.milvus.CreateDatabase(ctx, param.DbName) } -func (m *MilvusDataHandler) DropDatabase(ctx context.Context, param *DropDataBaseParam) error { +func (m *MilvusDataHandler) DropDatabase(ctx context.Context, param *api.DropDataBaseParam) error { return m.milvus.DropDatabase(ctx, param.DbName) } + +func (m *MilvusDataHandler) ReplicateMessage(ctx context.Context, param *api.ReplicateMessageParam) error { + _, err := m.milvus.ReplicateMessage(ctx, param.ChannelName, param.BeginTs, param.EndTs, param.MsgsBytes, param.StartPositions, param.EndPositions) + return err +} diff --git a/core/writer/milvus_handler_test.go b/core/writer/milvus_handler_test.go index b5b81800..d72ed9a4 100644 --- a/core/writer/milvus_handler_test.go +++ b/core/writer/milvus_handler_test.go @@ -16,170 +16,171 @@ package writer_test -import ( - "context" - "testing" - - "github.com/cockroachdb/errors" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-sdk-go/v2/entity" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/zilliztech/milvus-cdc/core/mocks" - "github.com/zilliztech/milvus-cdc/core/writer" -) - -var ( - address = "localhost:19530" - user = "foo" - password = "123456" - addressOption = writer.AddressOption(address) - userOption = writer.UserOption(user, password) - tlsOption = writer.TLSOption(true) - timeoutOption = writer.ConnectTimeoutOption(10) - ignorePartition = writer.IgnorePartitionOption(true) -) - -func TestNewMilvusDataHandler(t *testing.T) { - _, err := writer.NewMilvusDataHandler() - assert.Error(t, err) - - mockMilvusFactory := mocks.NewMilvusClientFactory(t) - mockMilvusClient := mocks.NewMilvusClientAPI(t) - factoryOption := writer.MilvusFactoryOption(mockMilvusFactory) - t.Run("success tls", func(t *testing.T) { - call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) - defer call.Unset() - - _, err := writer.NewMilvusDataHandler(addressOption, userOption, tlsOption, factoryOption) - assert.NoError(t, err) - }) - - t.Run("success no tls", func(t *testing.T) { - call := mockMilvusFactory.On("NewGrpcClientWithAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) - defer call.Unset() - - _, err := writer.NewMilvusDataHandler(addressOption, userOption, factoryOption) - assert.NoError(t, err) - }) - - t.Run("success no user", func(t *testing.T) { - call := mockMilvusFactory.On("NewGrpcClient", mock.Anything, address).Return(mockMilvusClient, nil) - defer call.Unset() - - _, err := writer.NewMilvusDataHandler(addressOption, factoryOption) - assert.NoError(t, err) - }) - - t.Run("fail", func(t *testing.T) { - call := mockMilvusFactory.On("NewGrpcClient", mock.Anything, address).Return(nil, errors.New("client error")) - defer call.Unset() - - _, err := writer.NewMilvusDataHandler(addressOption, factoryOption) - assert.Error(t, err) - }) -} - -func TestMilvusOp(t *testing.T) { - mockMilvusFactory := mocks.NewMilvusClientFactory(t) - mockMilvusClient := mocks.NewMilvusClientAPI(t) - factoryOption := writer.MilvusFactoryOption(mockMilvusFactory) - call := mockMilvusFactory.On("NewGrpcClient", mock.Anything, address).Return(mockMilvusClient, nil) - defer call.Unset() - - handler, err := writer.NewMilvusDataHandler(addressOption, ignorePartition, factoryOption) - assert.NoError(t, err) - - t.Run("create collection", func(t *testing.T) { - schema := &entity.Schema{} - var shardNum int32 = 5 - level := commonpb.ConsistencyLevel_Session - kv := &commonpb.KeyValuePair{Key: "foo", Value: "111"} - param := &writer.CreateCollectionParam{ - Schema: schema, - ShardsNum: shardNum, - ConsistencyLevel: level, - Properties: []*commonpb.KeyValuePair{kv}, - } - - createCall := mockMilvusClient.On("CreateCollection", mock.Anything, schema, shardNum, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { - assert.Len(t, args, 5) - }).Return(nil) - err := handler.CreateCollection(context.Background(), param) - assert.NoError(t, err) - createCall.Unset() - - createCall = mockMilvusClient.On("CreateCollection", mock.Anything, schema, shardNum, mock.Anything, mock.Anything).Return(errors.New("crete error")) - err = handler.CreateCollection(context.Background(), param) - assert.Error(t, err) - createCall.Unset() - }) - - t.Run("drop collection", func(t *testing.T) { - name := "foo" - param := &writer.DropCollectionParam{CollectionName: name} - dropCall := mockMilvusClient.On("DropCollection", mock.Anything, name).Return(nil) - err := handler.DropCollection(context.Background(), param) - assert.NoError(t, err) - dropCall.Unset() - - dropCall = mockMilvusClient.On("DropCollection", mock.Anything, name).Return(errors.New("drop error")) - err = handler.DropCollection(context.Background(), param) - assert.Error(t, err) - dropCall.Unset() - }) - - t.Run("insert", func(t *testing.T) { - collectionName := "col" - partitionName := "par" - column := entity.NewColumnBool("ok", []bool{true, false, true}) - param := &writer.InsertParam{ - CollectionName: collectionName, - PartitionName: partitionName, - Columns: []entity.Column{column}, - } - insertCall := mockMilvusClient.On("Insert", mock.Anything, collectionName, "", column).Return(nil, nil) - err := handler.Insert(context.Background(), param) - assert.NoError(t, err) - insertCall.Unset() - - insertCall = mockMilvusClient.On("Insert", mock.Anything, collectionName, "", column).Return(nil, errors.New("insert error")) - err = handler.Insert(context.Background(), param) - assert.Error(t, err) - insertCall.Unset() - - handler2, err := writer.NewMilvusDataHandler(addressOption, factoryOption) - assert.NoError(t, err) - insertCall = mockMilvusClient.On("Insert", mock.Anything, collectionName, partitionName, column).Return(nil, nil) - err = handler2.Insert(context.Background(), param) - assert.NoError(t, err) - insertCall.Unset() - }) - - t.Run("delete", func(t *testing.T) { - collectionName := "col" - partitionName := "par" - column := entity.NewColumnString("ok", []string{"a", "b", "c"}) - param := &writer.DeleteParam{ - CollectionName: collectionName, - PartitionName: partitionName, - Column: column, - } - deleteCall := mockMilvusClient.On("DeleteByPks", mock.Anything, collectionName, "", column).Return(nil) - err := handler.Delete(context.Background(), param) - assert.NoError(t, err) - deleteCall.Unset() - - deleteCall = mockMilvusClient.On("DeleteByPks", mock.Anything, collectionName, "", column).Return(errors.New("delete error")) - err = handler.Delete(context.Background(), param) - assert.Error(t, err) - deleteCall.Unset() - - handler2, err := writer.NewMilvusDataHandler(addressOption, factoryOption) - assert.NoError(t, err) - deleteCall = mockMilvusClient.On("DeleteByPks", mock.Anything, collectionName, partitionName, column).Return(nil) - err = handler2.Delete(context.Background(), param) - assert.NoError(t, err) - deleteCall.Unset() - }) -} +// import ( +// "context" +// "testing" +// +// "github.com/cockroachdb/errors" +// "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" +// "github.com/milvus-io/milvus-sdk-go/v2/entity" +// "github.com/stretchr/testify/assert" +// "github.com/stretchr/testify/mock" +// +// "github.com/zilliztech/milvus-cdc/core/mocks" +// "github.com/zilliztech/milvus-cdc/core/writer" +// ) +// +// var ( +// address = "localhost:19530" +// user = "foo" +// password = "123456" +// addressOption = writer.AddressOption(address) +// userOption = writer.UserOption(user, password) +// tlsOption = writer.TLSOption(true) +// timeoutOption = writer.ConnectTimeoutOption(10) +// ignorePartition = writer.IgnorePartitionOption(true) +// ) +// +// func TestNewMilvusDataHandler(t *testing.T) { +// _, err := writer.NewMilvusDataHandler() +// assert.Error(t, err) +// +// mockMilvusFactory := mocks.NewMilvusClientFactory(t) +// mockMilvusClient := mocks.NewMilvusClientAPI(t) +// factoryOption := writer.MilvusFactoryOption(mockMilvusFactory) +// t.Run("success tls", func(t *testing.T) { +// call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) +// defer call.Unset() +// +// _, err := writer.NewMilvusDataHandler(addressOption, userOption, tlsOption, factoryOption) +// assert.NoError(t, err) +// }) +// +// t.Run("success no tls", func(t *testing.T) { +// call := mockMilvusFactory.On("NewGrpcClientWithAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) +// defer call.Unset() +// +// _, err := writer.NewMilvusDataHandler(addressOption, userOption, factoryOption) +// assert.NoError(t, err) +// }) +// +// t.Run("success no user", func(t *testing.T) { +// call := mockMilvusFactory.On("NewGrpcClient", mock.Anything, address).Return(mockMilvusClient, nil) +// defer call.Unset() +// +// _, err := writer.NewMilvusDataHandler(addressOption, factoryOption) +// assert.NoError(t, err) +// }) +// +// t.Run("fail", func(t *testing.T) { +// call := mockMilvusFactory.On("NewGrpcClient", mock.Anything, address).Return(nil, errors.New("client error")) +// defer call.Unset() +// +// _, err := writer.NewMilvusDataHandler(addressOption, factoryOption) +// assert.Error(t, err) +// }) +// } +// +// func TestMilvusOp(t *testing.T) { +// mockMilvusFactory := mocks.NewMilvusClientFactory(t) +// mockMilvusClient := mocks.NewMilvusClientAPI(t) +// factoryOption := writer.MilvusFactoryOption(mockMilvusFactory) +// call := mockMilvusFactory.On("NewGrpcClient", mock.Anything, address).Return(mockMilvusClient, nil) +// defer call.Unset() +// +// handler, err := writer.NewMilvusDataHandler(addressOption, ignorePartition, factoryOption) +// assert.NoError(t, err) +// +// t.Run("create collection", func(t *testing.T) { +// schema := &entity.Schema{} +// var shardNum int32 = 5 +// level := commonpb.ConsistencyLevel_Session +// kv := &commonpb.KeyValuePair{Key: "foo", Value: "111"} +// param := &writer.CreateCollectionParam{ +// Schema: schema, +// ShardsNum: shardNum, +// ConsistencyLevel: level, +// Properties: []*commonpb.KeyValuePair{kv}, +// } +// +// createCall := mockMilvusClient.On("CreateCollection", mock.Anything, schema, shardNum, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { +// assert.Len(t, args, 5) +// }).Return(nil) +// err := handler.CreateCollection(context.Background(), param) +// assert.NoError(t, err) +// createCall.Unset() +// +// createCall = mockMilvusClient.On("CreateCollection", mock.Anything, schema, shardNum, mock.Anything, mock.Anything).Return(errors.New("crete error")) +// err = handler.CreateCollection(context.Background(), param) +// assert.Error(t, err) +// createCall.Unset() +// }) +// +// t.Run("drop collection", func(t *testing.T) { +// name := "foo" +// param := &writer.DropCollectionParam{CollectionName: name} +// dropCall := mockMilvusClient.On("DropCollection", mock.Anything, name).Return(nil) +// err := handler.DropCollection(context.Background(), param) +// assert.NoError(t, err) +// dropCall.Unset() +// +// dropCall = mockMilvusClient.On("DropCollection", mock.Anything, name).Return(errors.New("drop error")) +// err = handler.DropCollection(context.Background(), param) +// assert.Error(t, err) +// dropCall.Unset() +// }) +// +// t.Run("insert", func(t *testing.T) { +// collectionName := "col" +// partitionName := "par" +// column := entity.NewColumnBool("ok", []bool{true, false, true}) +// param := &writer.InsertParam{ +// CollectionName: collectionName, +// PartitionName: partitionName, +// Columns: []entity.Column{column}, +// } +// insertCall := mockMilvusClient.On("Insert", mock.Anything, collectionName, "", column).Return(nil, nil) +// err := handler.Insert(context.Background(), param) +// assert.NoError(t, err) +// insertCall.Unset() +// +// insertCall = mockMilvusClient.On("Insert", mock.Anything, collectionName, "", column).Return(nil, errors.New("insert error")) +// err = handler.Insert(context.Background(), param) +// assert.Error(t, err) +// insertCall.Unset() +// +// handler2, err := writer.NewMilvusDataHandler(addressOption, factoryOption) +// assert.NoError(t, err) +// insertCall = mockMilvusClient.On("Insert", mock.Anything, collectionName, partitionName, column).Return(nil, nil) +// err = handler2.Insert(context.Background(), param) +// assert.NoError(t, err) +// insertCall.Unset() +// }) +// +// t.Run("delete", func(t *testing.T) { +// collectionName := "col" +// partitionName := "par" +// column := entity.NewColumnString("ok", []string{"a", "b", "c"}) +// param := &writer.DeleteParam{ +// CollectionName: collectionName, +// PartitionName: partitionName, +// Column: column, +// } +// deleteCall := mockMilvusClient.On("DeleteByPks", mock.Anything, collectionName, "", column).Return(nil) +// err := handler.Delete(context.Background(), param) +// assert.NoError(t, err) +// deleteCall.Unset() +// +// deleteCall = mockMilvusClient.On("DeleteByPks", mock.Anything, collectionName, "", column).Return(errors.New("delete error")) +// err = handler.Delete(context.Background(), param) +// assert.Error(t, err) +// deleteCall.Unset() +// +// handler2, err := writer.NewMilvusDataHandler(addressOption, factoryOption) +// assert.NoError(t, err) +// deleteCall = mockMilvusClient.On("DeleteByPks", mock.Anything, collectionName, partitionName, column).Return(nil) +// err = handler2.Delete(context.Background(), param) +// assert.NoError(t, err) +// deleteCall.Unset() +// }) +// } diff --git a/core/writer/msg_size.go b/core/writer/msg_size.go index 3d74bf4a..65aa28b1 100644 --- a/core/writer/msg_size.go +++ b/core/writer/msg_size.go @@ -20,9 +20,11 @@ import ( "encoding/binary" "github.com/milvus-io/milvus-sdk-go/v2/entity" + "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/mq/msgstream" - "github.com/zilliztech/milvus-cdc/core/util" "go.uber.org/zap" + + "github.com/zilliztech/milvus-cdc/core/util" ) func SizeOfInsertMsg(msg *msgstream.InsertMsg) int64 { @@ -39,17 +41,17 @@ func SizeOfInsertMsg(msg *msgstream.InsertMsg) int64 { for _, fieldData := range msg.FieldsData { if column, err := entity.FieldDataColumn(fieldData, 0, -1); err == nil { if !sizeFunc(column) { - util.Log.Warn("insert msg, fail to get the data size", zap.String("name", column.Name())) + log.Warn("insert msg, fail to get the data size", zap.String("name", column.Name())) return -1 } } else { column, err := entity.FieldDataVector(fieldData) if err != nil { - util.Log.Warn("fail to get the data size", zap.Any("msg", msg), zap.Error(err)) + log.Warn("fail to get the data size", zap.Any("msg", msg), zap.Error(err)) return -1 } if !sizeFunc(column) { - util.Log.Warn("insert msg, fail to get the data size", zap.String("name", column.Name())) + log.Warn("insert msg, fail to get the data size", zap.String("name", column.Name())) return -1 } } @@ -61,11 +63,11 @@ func SizeOfDeleteMsg(msg *msgstream.DeleteMsg) int64 { var totalSize int64 column, err := entity.IDColumns(msg.PrimaryKeys, 0, -1) if err != nil { - util.Log.Warn("fail to get the id columns", zap.Any("msg", msg), zap.Error(err)) + log.Warn("fail to get the id columns", zap.Any("msg", msg), zap.Error(err)) return -1 } if totalSize = SizeColumn(column); totalSize < 0 { - util.Log.Warn("delete msg, fail to get the data size", zap.String("name", column.Name())) + log.Warn("delete msg, fail to get the data size", zap.String("name", column.Name())) return -1 } return totalSize @@ -117,7 +119,7 @@ func SizeColumn(column entity.Column) int64 { } return int64(total) default: - util.Log.Warn("invalid type", zap.Any("column", column)) + log.Warn("invalid type", zap.Any("column", column)) return -1 } return int64(binary.Size(data)) diff --git a/core/writer/replicate_message_manager.go b/core/writer/replicate_message_manager.go new file mode 100644 index 00000000..4f5c48c1 --- /dev/null +++ b/core/writer/replicate_message_manager.go @@ -0,0 +1,96 @@ +package writer + +import ( + "context" + "sync" + + "github.com/cockroachdb/errors" + + "github.com/zilliztech/milvus-cdc/core/api" + "github.com/zilliztech/milvus-cdc/core/util" +) + +// replicateMessageManager For the same channel, it is unsafe in concurrent situations +type replicateMessageManager struct { + handler api.DataHandler + messageHandlerMap util.Map[string, *replicateMessageHandler] + messageBufferSize int +} + +func NewReplicateMessageManager(handler api.DataHandler, messageBufferSize int) api.MessageManager { + manager := &replicateMessageManager{ + handler: handler, + messageBufferSize: messageBufferSize, + } + return manager +} + +func (r *replicateMessageManager) ReplicateMessage(message *api.ReplicateMessage) { + channelName := message.Param.ChannelName + handler, _ := r.messageHandlerMap.LoadOrStore(channelName, newReplicateMessageHandler(channelName, r.messageBufferSize, r.handler)) + handler.handleMessage(message) +} + +func (r *replicateMessageManager) Close(channelName string) { + if handler, ok := r.messageHandlerMap.Load(channelName); ok { + handler.close() + } +} + +type replicateMessageHandler struct { + channelName string + handler api.DataHandler + messageChan chan *api.ReplicateMessage + stopOnce sync.Once + stopChan chan struct{} +} + +func (r *replicateMessageHandler) startHandleMessageLoop() { + go func() { + for { + message, ok := <-r.messageChan + if !ok { + return + } + messageParam := message.Param + // for _, position := range messageParam.StartPositions { + // position.ChannelName = messageParam.ChannelName + // } + // for _, position := range messageParam.EndPositions { + // position.ChannelName = messageParam.ChannelName + // } + err := r.handler.ReplicateMessage(context.Background(), messageParam) + if err != nil { + message.FailFunc(message.Param, err) + } else { + message.SuccessFunc(message.Param) + } + } + }() +} + +func (r *replicateMessageHandler) handleMessage(message *api.ReplicateMessage) { + select { + case <-r.stopChan: + message.FailFunc(message.Param, errors.New("replicate message handler is closed")) + case r.messageChan <- message: + } +} + +func (r *replicateMessageHandler) close() { + r.stopOnce.Do(func() { + close(r.stopChan) + }) +} + +func newReplicateMessageHandler(channelName string, messageBufferSize int, handler api.DataHandler) *replicateMessageHandler { + paramChan := make(chan *api.ReplicateMessage, messageBufferSize) + replicateHandler := &replicateMessageHandler{ + channelName: channelName, + handler: handler, + messageChan: paramChan, + stopChan: make(chan struct{}), + } + replicateHandler.startHandleMessageLoop() + return replicateHandler +} diff --git a/core/writer/writer_api.go b/core/writer/writer_api.go index eb6e3fec..47728425 100644 --- a/core/writer/writer_api.go +++ b/core/writer/writer_api.go @@ -16,73 +16,68 @@ package writer -import ( - "context" - - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus/pkg/mq/msgstream" - "github.com/zilliztech/milvus-cdc/core/model" - "github.com/zilliztech/milvus-cdc/core/util" -) - -//go:generate mockery --name=CDCWriter --filename=cdc_writer_mock.go --output=../mocks -type CDCWriter interface { - util.CDCMark - - // Write you MUST handle the error if the return value is not nil - Write(context context.Context, data *model.CDCData, callback WriteCallback) error - Flush(context context.Context) -} - -type DefaultWriter struct { - util.CDCMark -} - -func (d *DefaultWriter) Write(context context.Context, data *model.CDCData, callback WriteCallback) error { - return nil -} - -func (d *DefaultWriter) Flush(context context.Context) { -} - -type CallbackChannelInfo struct { - Position *commonpb.KeyDataPair - MsgType msgstream.MsgType - MsgRowCount int - Ts uint64 -} - -//go:generate mockery --name=WriteCallback --filename=write_callbakc_mock.go --output=../mocks -type WriteCallback interface { - util.CDCMark - - // lastPosition - OnFail(data *model.CDCData, err error) - - OnSuccess(collectionID int64, channelInfos map[string]CallbackChannelInfo) -} - -type DefaultWriteCallBack struct { - util.CDCMark -} - -func (d *DefaultWriteCallBack) OnFail(data *model.CDCData, err error) { -} - -func (d *DefaultWriteCallBack) OnSuccess(collectionID int64, channelInfos map[string]CallbackChannelInfo) { -} - -type NotifyCollectionPositionChangeFunc func(collectionID int64, collectionName string, pChannelName string, position *commonpb.KeyDataPair) - -type BufferOp interface { - Apply(ctx context.Context, f NotifyCollectionPositionChangeFunc) -} - -type BufferOpFunc func(context.Context, NotifyCollectionPositionChangeFunc) - -func (b BufferOpFunc) Apply(ctx context.Context, f NotifyCollectionPositionChangeFunc) { - b(ctx, f) -} - -type CombinableBufferOpFunc struct { -} +// import ( +// "context" +// +// "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" +// "github.com/milvus-io/milvus/pkg/mq/msgstream" +// +// "github.com/zilliztech/milvus-cdc/core/model" +// "github.com/zilliztech/milvus-cdc/core/util" +// ) +// +// type CDCWriter interface { +// // Write you MUST handle the error if the return value is not nil +// Write(context context.Context, data *model.CDCData, callback WriteCallback) error +// Flush(context context.Context) +// } +// +// type DefaultWriter struct { +// util.CDCMark +// } +// +// func (d *DefaultWriter) Write(context context.Context, data *model.CDCData, callback WriteCallback) error { +// return nil +// } +// +// func (d *DefaultWriter) Flush(context context.Context) { +// } +// +// type CallbackChannelInfo struct { +// Position *commonpb.KeyDataPair +// MsgType msgstream.MsgType +// MsgRowCount int +// Ts uint64 +// } +// +// type WriteCallback interface { +// // lastPosition +// OnFail(data *model.CDCData, err error) +// +// OnSuccess(collectionID int64, channelInfos map[string]CallbackChannelInfo) +// } +// +// type DefaultWriteCallBack struct { +// util.CDCMark +// } +// +// func (d *DefaultWriteCallBack) OnFail(data *model.CDCData, err error) { +// } +// +// func (d *DefaultWriteCallBack) OnSuccess(collectionID int64, channelInfos map[string]CallbackChannelInfo) { +// } +// +// type NotifyCollectionPositionChangeFunc func(collectionID int64, collectionName string, pChannelName string, position *commonpb.KeyDataPair) +// +// type BufferOp interface { +// Apply(ctx context.Context, f NotifyCollectionPositionChangeFunc) +// } +// +// type BufferOpFunc func(context.Context, NotifyCollectionPositionChangeFunc) +// +// func (b BufferOpFunc) Apply(ctx context.Context, f NotifyCollectionPositionChangeFunc) { +// b(ctx, f) +// } +// +// type CombinableBufferOpFunc struct { +// } diff --git a/core/writer/writer_template.go b/core/writer/writer_template.go index 956553e8..738f372c 100644 --- a/core/writer/writer_template.go +++ b/core/writer/writer_template.go @@ -16,901 +16,902 @@ package writer -import ( - "context" - "encoding/json" - "fmt" - "math/rand" - "sync" - "time" - - "github.com/cockroachdb/errors" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus-sdk-go/v2/entity" - "github.com/milvus-io/milvus/pkg/mq/msgstream" - "github.com/samber/lo" - "github.com/zilliztech/milvus-cdc/core/config" - "github.com/zilliztech/milvus-cdc/core/model" - "github.com/zilliztech/milvus-cdc/core/util" - "go.uber.org/zap" -) - -var log = util.Log - -type BufferConfig struct { - Period time.Duration - Size int64 -} - -var DefaultBufferConfig = BufferConfig{ - Period: 1 * time.Minute, - Size: 1024 * 1024, -} - -var NoBufferConfig = BufferConfig{ - Period: 0, - Size: -1, -} - -type CDCWriterTemplate struct { - DefaultWriter - - handler CDCDataHandler - errProtect *ErrorProtect - funcMap map[msgstream.MsgType]func(context.Context, *model.CDCData, WriteCallback) - - bufferConfig BufferConfig - bufferLock sync.Mutex - currentBufferSize int64 - bufferOps []BufferOp - bufferUpdatePositionFunc NotifyCollectionPositionChangeFunc - bufferOpsChan chan []BufferOp - - bufferData []lo.Tuple2[*model.CDCData, WriteCallback] - bufferDataChan chan []lo.Tuple2[*model.CDCData, WriteCallback] -} - -// NewCDCWriterTemplate options must include HandlerOption -func NewCDCWriterTemplate(options ...config.Option[*CDCWriterTemplate]) CDCWriter { - c := &CDCWriterTemplate{ - bufferConfig: DefaultBufferConfig, - errProtect: FastFail(), - } - for _, option := range options { - option.Apply(c) - } - c.funcMap = map[msgstream.MsgType]func(context.Context, *model.CDCData, WriteCallback){ - commonpb.MsgType_CreateCollection: c.handleCreateCollection, - commonpb.MsgType_DropCollection: c.handleDropCollection, - commonpb.MsgType_Insert: c.handleInsert, - commonpb.MsgType_Delete: c.handleDelete, - commonpb.MsgType_CreatePartition: c.handleCreatePartition, - commonpb.MsgType_DropPartition: c.handleDropPartition, - commonpb.MsgType_CreateIndex: c.handleRPCRequest, - commonpb.MsgType_DropIndex: c.handleRPCRequest, - commonpb.MsgType_LoadCollection: c.handleRPCRequest, - commonpb.MsgType_ReleaseCollection: c.handleRPCRequest, - commonpb.MsgType_CreateDatabase: c.handleRPCRequest, - commonpb.MsgType_DropDatabase: c.handleRPCRequest, - } - c.initBuffer() - c.periodFlush() - return c -} - -func (c *CDCWriterTemplate) initBuffer() { - c.bufferDataChan = make(chan []lo.Tuple2[*model.CDCData, WriteCallback]) - - // execute buffer ops - go func() { - for { - select { - case <-c.errProtect.Chan(): - log.Warn("the error protection is triggered", zap.String("protect", c.errProtect.Info())) - return - default: - } - - latestPositions := make(map[int64]map[string]*commonpb.KeyDataPair) - collectionNames := make(map[int64]string) - positionFunc := NotifyCollectionPositionChangeFunc(func(collectionID int64, collectionName string, pChannelName string, position *commonpb.KeyDataPair) { - if position == nil { - return - } - collectionNames[collectionID] = collectionName - collectionPositions, ok := latestPositions[collectionID] - if !ok { - collectionPositions = make(map[string]*commonpb.KeyDataPair) - latestPositions[collectionID] = collectionPositions - } - collectionPositions[pChannelName] = position - }) - - bufferData := <-c.bufferDataChan - combineDataMap := make(map[string][]*CombineData) - c.combineDataFunc(bufferData, combineDataMap, positionFunc) - executeSuccesses := func(successes []func()) { - for _, success := range successes { - success() - } - } - executeFails := func(fails []func(err error), err error) { - for _, fail := range fails { - fail(err) - } - } - - ctx := context.Background() - for _, combineDatas := range combineDataMap { - for _, combineData := range combineDatas { - var err error - switch p := combineData.param.(type) { - case *InsertParam: - err = c.handler.Insert(ctx, p) - case *DeleteParam: - err = c.handler.Delete(ctx, p) - case *DropCollectionParam: - err = c.handler.DropCollection(ctx, p) - case *DropPartitionParam: - err = c.handler.DropPartition(ctx, p) - case *CreateIndexParam: - err = c.handler.CreateIndex(ctx, p) - case *DropIndexParam: - err = c.handler.DropIndex(ctx, p) - case *LoadCollectionParam: - err = c.handler.LoadCollection(ctx, p) - case *ReleaseCollectionParam: - err = c.handler.ReleaseCollection(ctx, p) - case *CreateDataBaseParam: - err = c.handler.CreateDatabase(ctx, p) - case *DropDataBaseParam: - err = c.handler.DropDatabase(ctx, p) - default: - log.Warn("invalid param", zap.Any("data", combineData)) - continue - } - if err != nil { - executeFails(combineData.fails, err) - continue - } - executeSuccesses(combineData.successes) - } - } - - if c.bufferUpdatePositionFunc != nil { - for collectionID, collectionPositions := range latestPositions { - for pChannelName, position := range collectionPositions { - c.bufferUpdatePositionFunc(collectionID, collectionNames[collectionID], pChannelName, position) - } - } - } - } - }() -} - -type CombineData struct { - param any - fails []func(err error) - successes []func() -} - -func (c *CDCWriterTemplate) combineDataFunc(dataArr []lo.Tuple2[*model.CDCData, WriteCallback], - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc) { - - for _, tuple := range dataArr { - data := tuple.A - callback := tuple.B - switch msg := data.Msg.(type) { - case *msgstream.InsertMsg: - c.handleInsertBuffer(msg, data, callback, combineDataMap, positionFunc) - case *msgstream.DeleteMsg: - c.handleDeleteBuffer(msg, data, callback, combineDataMap, positionFunc) - case *msgstream.DropCollectionMsg: - c.handleDropCollectionBuffer(msg, data, callback, combineDataMap, positionFunc) - case *msgstream.DropPartitionMsg: - c.handleDropPartitionBuffer(msg, data, callback, combineDataMap, positionFunc) - case *msgstream.CreateIndexMsg: - c.handleCreateIndexBuffer(msg, data, callback, combineDataMap, positionFunc) - case *msgstream.DropIndexMsg: - c.handleDropIndexBuffer(msg, data, callback, combineDataMap, positionFunc) - case *msgstream.LoadCollectionMsg: - c.handleLoadCollectionBuffer(msg, data, callback, combineDataMap, positionFunc) - case *msgstream.ReleaseCollectionMsg: - c.handleReleaseCollectionBuffer(msg, data, callback, combineDataMap, positionFunc) - case *msgstream.CreateDatabaseMsg: - c.handleCreateDatabaseBuffer(msg, data, callback, combineDataMap, positionFunc) - case *msgstream.DropDatabaseMsg: - c.handleDropDatabaseBuffer(msg, data, callback, combineDataMap, positionFunc) - } - } -} - -func (c *CDCWriterTemplate) generateBufferKey(a string, b string) string { - return a + ":" + b -} - -func (c *CDCWriterTemplate) handleInsertBuffer(msg *msgstream.InsertMsg, - data *model.CDCData, callback WriteCallback, - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc, -) { - - collectionName := msg.CollectionName - partitionName := msg.PartitionName - dataKey := c.generateBufferKey(collectionName, partitionName) - // construct columns - var columns []entity.Column - for _, fieldData := range msg.FieldsData { - if column, err := entity.FieldDataColumn(fieldData, 0, -1); err == nil { - columns = append(columns, column) - } else { - column, err := entity.FieldDataVector(fieldData) - if err != nil { - c.fail("fail to parse the data", err, data, callback) - return - } - columns = append(columns, column) - } - } - // new combine data for convenient usage below - newCombineData := &CombineData{ - param: &InsertParam{ - CollectionName: collectionName, - PartitionName: partitionName, - Columns: columns, - }, - successes: []func(){ - func() { - c.success(msg.CollectionID, collectionName, len(msg.RowIDs), data, callback, positionFunc) - }, - }, - fails: []func(err error){ - func(err error) { - c.fail("fail to insert the data", err, data, callback) - }, - }, - } - combineDataArr, ok := combineDataMap[dataKey] - // check whether the combineDataMap contains the key, if not, add the data - if !ok { - combineDataMap[dataKey] = []*CombineData{ - newCombineData, - } - return - } - lastCombineData := combineDataArr[len(combineDataArr)-1] - insertParam, ok := lastCombineData.param.(*InsertParam) - // check whether the last data is insert, if not, add the data to array - if !ok { - combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) - return - } - // combine the data - if err := c.preCombineColumn(insertParam.Columns, columns); err != nil { - c.fail("fail to combine the data", err, data, callback) - return - } - c.combineColumn(insertParam.Columns, columns) - lastCombineData.successes = append(lastCombineData.successes, newCombineData.successes...) - lastCombineData.fails = append(lastCombineData.fails, newCombineData.fails...) -} - -func (c *CDCWriterTemplate) handleDeleteBuffer(msg *msgstream.DeleteMsg, - data *model.CDCData, callback WriteCallback, - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc, -) { - collectionName := msg.CollectionName - partitionName := msg.PartitionName - dataKey := c.generateBufferKey(collectionName, partitionName) - // get the id column - column, err := entity.IDColumns(msg.PrimaryKeys, 0, -1) - if err != nil { - c.fail("fail to get the id columns", err, data, callback) - return - } - newCombineData := &CombineData{ - param: &DeleteParam{ - CollectionName: collectionName, - PartitionName: partitionName, - Column: column, - }, - successes: []func(){ - func() { - c.success(msg.CollectionID, collectionName, int(msg.NumRows), data, callback, positionFunc) - }, - }, - fails: []func(err error){ - func(err error) { - c.fail("fail to delete the column", err, data, callback) - }, - }, - } - combineDataArr, ok := combineDataMap[dataKey] - // check whether the combineDataMap contains the key, if not, add the data - if !ok { - combineDataMap[dataKey] = []*CombineData{ - newCombineData, - } - return - } - lastCombineData := combineDataArr[len(combineDataArr)-1] - deleteParam, ok := lastCombineData.param.(*DeleteParam) - // check whether the last data is insert, if not, add the data to array - if !ok { - combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) - return - } - // combine the data - var values []interface{} - switch columnValue := column.(type) { - case *entity.ColumnInt64: - for _, id := range columnValue.Data() { - values = append(values, id) - } - case *entity.ColumnVarChar: - for _, varchar := range columnValue.Data() { - values = append(values, varchar) - } - default: - c.fail("fail to combine the delete data", err, data, callback) - } - for _, value := range values { - err = deleteParam.Column.AppendValue(value) - if err != nil { - c.fail("fail to combine the delete data", err, data, callback) - return - } - } - lastCombineData.successes = append(lastCombineData.successes, newCombineData.successes...) - lastCombineData.fails = append(lastCombineData.fails, newCombineData.fails...) -} - -func (c *CDCWriterTemplate) handleDropCollectionBuffer(msg *msgstream.DropCollectionMsg, - data *model.CDCData, callback WriteCallback, - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc, -) { - collectionName := msg.CollectionName - dataKey := c.generateBufferKey(collectionName, "") - newCombineData := &CombineData{ - param: &DropCollectionParam{ - CollectionName: collectionName, - }, - successes: []func(){ - func() { - channelInfos := make(map[string]CallbackChannelInfo) - collectChannelInfo := func(dropCollectionMsg *msgstream.DropCollectionMsg) { - position := dropCollectionMsg.Position() - kd := &commonpb.KeyDataPair{ - Key: position.ChannelName, - Data: position.MsgID, - } - channelInfos[position.ChannelName] = CallbackChannelInfo{ - Position: kd, - Ts: dropCollectionMsg.EndTs(), - } - } - collectChannelInfo(msg) - if msgsValue := data.Extra[model.DropCollectionMsgsKey]; msgsValue != nil { - msgs := msgsValue.([]*msgstream.DropCollectionMsg) - for _, tsMsg := range msgs { - collectChannelInfo(tsMsg) - } - } - - callback.OnSuccess(msg.CollectionID, channelInfos) - if positionFunc != nil { - for _, info := range channelInfos { - positionFunc(msg.CollectionID, msg.CollectionName, info.Position.Key, info.Position) - } - } - }, - }, - fails: []func(err error){ - func(err error) { - c.fail("fail to drop collection", err, data, callback) - }, - }, - } - combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) -} - -func (c *CDCWriterTemplate) handleDropPartitionBuffer(msg *msgstream.DropPartitionMsg, - data *model.CDCData, callback WriteCallback, - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc, -) { - collectionName := msg.CollectionName - partitionName := msg.PartitionName - dataKey := c.generateBufferKey(collectionName, partitionName) - newCombineData := &CombineData{ - param: &DropPartitionParam{ - CollectionName: collectionName, - PartitionName: partitionName, - }, - successes: []func(){ - func() { - channelInfos := make(map[string]CallbackChannelInfo) - collectChannelInfo := func(dropPartitionMsg *msgstream.DropPartitionMsg) { - position := dropPartitionMsg.Position() - kd := &commonpb.KeyDataPair{ - Key: position.ChannelName, - Data: position.MsgID, - } - channelInfos[position.ChannelName] = CallbackChannelInfo{ - Position: kd, - Ts: dropPartitionMsg.EndTs(), - } - } - collectChannelInfo(msg) - if msgsValue := data.Extra[model.DropPartitionMsgsKey]; msgsValue != nil { - msgs := msgsValue.([]*msgstream.DropPartitionMsg) - for _, tsMsg := range msgs { - collectChannelInfo(tsMsg) - } - } - - callback.OnSuccess(msg.CollectionID, channelInfos) - if positionFunc != nil { - for _, info := range channelInfos { - positionFunc(msg.CollectionID, msg.CollectionName, info.Position.Key, info.Position) - } - } - }, - }, - fails: []func(err error){ - func(err error) { - c.fail("fail to drop collection", err, data, callback) - }, - }, - } - combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) -} - -func (c *CDCWriterTemplate) handleCreateIndexBuffer(msg *msgstream.CreateIndexMsg, - data *model.CDCData, callback WriteCallback, - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc, -) { - dataKey := fmt.Sprintf("create_index_%s_%s_%d", msg.CollectionName, msg.IndexName, rand.Int()) - newCombineData := &CombineData{ - param: &CreateIndexParam{ - CreateIndexRequest: msg.CreateIndexRequest, - }, - successes: []func(){ - func() { - c.rpcRequestSuccess(msg, data, callback, positionFunc) - }, - }, - fails: []func(err error){ - func(err error) { - c.fail("fail to create index", err, data, callback) - }, - }, - } - combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) -} - -func (c *CDCWriterTemplate) handleDropIndexBuffer(msg *msgstream.DropIndexMsg, - data *model.CDCData, callback WriteCallback, - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc, -) { - dataKey := fmt.Sprintf("drop_index_%s_%s_%d", msg.CollectionName, msg.IndexName, rand.Int()) - newCombineData := &CombineData{ - param: &DropIndexParam{ - DropIndexRequest: msg.DropIndexRequest, - }, - successes: []func(){ - func() { - c.rpcRequestSuccess(msg, data, callback, positionFunc) - }, - }, - fails: []func(err error){ - func(err error) { - c.fail("fail to drop index", err, data, callback) - }, - }, - } - combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) -} - -func (c *CDCWriterTemplate) handleLoadCollectionBuffer(msg *msgstream.LoadCollectionMsg, - data *model.CDCData, callback WriteCallback, - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc, -) { - dataKey := fmt.Sprintf("load_collection_%s_%d", msg.CollectionName, rand.Int()) - newCombineData := &CombineData{ - param: &LoadCollectionParam{ - LoadCollectionRequest: msg.LoadCollectionRequest, - }, - successes: []func(){ - func() { - c.rpcRequestSuccess(msg, data, callback, positionFunc) - }, - }, - fails: []func(err error){ - func(err error) { - c.fail("fail to load collection", err, data, callback) - }, - }, - } - combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) -} - -func (c *CDCWriterTemplate) handleReleaseCollectionBuffer(msg *msgstream.ReleaseCollectionMsg, - data *model.CDCData, callback WriteCallback, - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc, -) { - dataKey := fmt.Sprintf("release_collection_%s_%d", msg.CollectionName, rand.Int()) - newCombineData := &CombineData{ - param: &ReleaseCollectionParam{ - ReleaseCollectionRequest: msg.ReleaseCollectionRequest, - }, - successes: []func(){ - func() { - c.rpcRequestSuccess(msg, data, callback, positionFunc) - }, - }, - fails: []func(err error){ - func(err error) { - c.fail("fail to release collection", err, data, callback) - }, - }, - } - combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) -} - -func (c *CDCWriterTemplate) handleCreateDatabaseBuffer(msg *msgstream.CreateDatabaseMsg, - data *model.CDCData, callback WriteCallback, - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc, -) { - dataKey := fmt.Sprintf("create_database_%s_%d", msg.DbName, rand.Int()) - newCombineData := &CombineData{ - param: &CreateDataBaseParam{ - CreateDatabaseRequest: msg.CreateDatabaseRequest, - }, - successes: []func(){ - func() { - c.rpcRequestSuccess(msg, data, callback, positionFunc) - }, - }, - fails: []func(err error){ - func(err error) { - c.fail("fail to create database", err, data, callback) - }, - }, - } - combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) -} - -func (c *CDCWriterTemplate) handleDropDatabaseBuffer(msg *msgstream.DropDatabaseMsg, - data *model.CDCData, callback WriteCallback, - combineDataMap map[string][]*CombineData, - positionFunc NotifyCollectionPositionChangeFunc, -) { - dataKey := fmt.Sprintf("drop_database_%s_%d", msg.DbName, rand.Int()) - newCombineData := &CombineData{ - param: &DropDataBaseParam{ - DropDatabaseRequest: msg.DropDatabaseRequest, - }, - successes: []func(){ - func() { - c.rpcRequestSuccess(msg, data, callback, positionFunc) - }, - }, - fails: []func(err error){ - func(err error) { - c.fail("fail to drop database", err, data, callback) - }, - }, - } - combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) -} - -func (c *CDCWriterTemplate) rpcRequestSuccess(msg msgstream.TsMsg, data *model.CDCData, callback WriteCallback, positionFunc NotifyCollectionPositionChangeFunc) { - channelInfos := make(map[string]CallbackChannelInfo) - position := msg.Position() - info := CallbackChannelInfo{ - Position: &commonpb.KeyDataPair{ - Key: position.ChannelName, - Data: position.MsgID, - }, - Ts: msg.EndTs(), - } - channelInfos[position.ChannelName] = info - collectionID := util.RPCRequestCollectionID - collectionName := util.RPCRequestCollectionName - if value, ok := data.Extra[model.CollectionIDKey]; ok { - collectionID = value.(int64) - } - if value, ok := data.Extra[model.CollectionNameKey]; ok { - collectionName = value.(string) - } - callback.OnSuccess(collectionID, channelInfos) - if positionFunc != nil { - positionFunc(collectionID, collectionName, info.Position.Key, info.Position) - } -} - -func (c *CDCWriterTemplate) periodFlush() { - go func() { - if c.bufferConfig.Period <= 0 { - return - } - ticker := time.NewTicker(c.bufferConfig.Period) - for { - <-ticker.C - c.Flush(context.Background()) - } - }() -} - -func (c *CDCWriterTemplate) Write(ctx context.Context, data *model.CDCData, callback WriteCallback) error { - select { - case <-c.errProtect.Chan(): - log.Warn("the error protection is triggered", zap.String("protect", c.errProtect.Info())) - return errors.New("the error protection is triggered") - default: - } - - handleFunc, ok := c.funcMap[data.Msg.Type()] - if !ok { - // don't execute the fail callback, because the future messages will be ignored and don't trigger the error protection - log.Warn("not support message type", zap.Any("data", data)) - return fmt.Errorf("not support message type, type: %s", data.Msg.Type().String()) - } - handleFunc(ctx, data, callback) - return nil -} - -func (c *CDCWriterTemplate) Flush(context context.Context) { - c.bufferLock.Lock() - defer c.bufferLock.Unlock() - c.clearBufferFunc() -} - -func (c *CDCWriterTemplate) handleCreateCollection(ctx context.Context, data *model.CDCData, callback WriteCallback) { - msg := data.Msg.(*msgstream.CreateCollectionMsg) - schema := &schemapb.CollectionSchema{} - err := json.Unmarshal(msg.Schema, schema) - if err != nil { - c.fail("fail to unmarshal the collection schema", err, data, callback) - return - } - var shardNum int32 - if value, ok := data.Extra[model.ShardNumKey]; ok { - shardNum = value.(int32) - } - level := commonpb.ConsistencyLevel_Strong - if value, ok := data.Extra[model.ConsistencyLevelKey]; ok { - level = value.(commonpb.ConsistencyLevel) - } - var properties []*commonpb.KeyValuePair - if value, ok := data.Extra[model.CollectionPropertiesKey]; ok { - properties = value.([]*commonpb.KeyValuePair) - } - - entitySchema := &entity.Schema{} - entitySchema = entitySchema.ReadProto(schema) - err = c.handler.CreateCollection(ctx, &CreateCollectionParam{ - Schema: entitySchema, - ShardsNum: shardNum, - ConsistencyLevel: level, - Properties: properties, - }) - if err != nil { - c.fail("fail to create the collection", err, data, callback) - return - } - callback.OnSuccess(msg.CollectionID, nil) -} - -func (c *CDCWriterTemplate) handleDropCollection(ctx context.Context, data *model.CDCData, callback WriteCallback) { - c.bufferLock.Lock() - defer c.bufferLock.Unlock() - c.bufferData = append(c.bufferData, lo.T2(data, callback)) - c.clearBufferFunc() -} - -func (c *CDCWriterTemplate) handleInsert(ctx context.Context, data *model.CDCData, callback WriteCallback) { - msg := data.Msg.(*msgstream.InsertMsg) - totalSize := SizeOfInsertMsg(msg) - if totalSize < 0 { - c.fail("fail to get the data size", errors.New("invalid column type"), data, callback) - return - } - - c.bufferLock.Lock() - defer c.bufferLock.Unlock() - c.currentBufferSize += totalSize - c.bufferData = append(c.bufferData, lo.T2(data, callback)) - c.checkBufferSize() -} - -func (c *CDCWriterTemplate) handleDelete(ctx context.Context, data *model.CDCData, callback WriteCallback) { - msg := data.Msg.(*msgstream.DeleteMsg) - totalSize := SizeOfDeleteMsg(msg) - - c.bufferLock.Lock() - defer c.bufferLock.Unlock() - c.currentBufferSize += totalSize - c.bufferData = append(c.bufferData, lo.T2(data, callback)) - c.checkBufferSize() -} - -func (c *CDCWriterTemplate) handleCreatePartition(ctx context.Context, data *model.CDCData, callback WriteCallback) { - msg := data.Msg.(*msgstream.CreatePartitionMsg) - err := c.handler.CreatePartition(ctx, &CreatePartitionParam{ - CollectionName: msg.CollectionName, - PartitionName: msg.PartitionName, - }) - if err != nil { - c.fail("fail to create the partition", err, data, callback) - return - } - callback.OnSuccess(msg.CollectionID, nil) -} - -func (c *CDCWriterTemplate) handleDropPartition(ctx context.Context, data *model.CDCData, callback WriteCallback) { - c.bufferLock.Lock() - defer c.bufferLock.Unlock() - c.bufferData = append(c.bufferData, lo.T2(data, callback)) - c.clearBufferFunc() -} - -func (c *CDCWriterTemplate) handleRPCRequest(ctx context.Context, data *model.CDCData, callback WriteCallback) { - c.bufferLock.Lock() - defer c.bufferLock.Unlock() - c.bufferData = append(c.bufferData, lo.T2(data, callback)) - c.clearBufferFunc() -} - -func (c *CDCWriterTemplate) collectionName(data *model.CDCData) string { - f, ok := data.Msg.(interface{ GetCollectionName() string }) - if ok { - return f.GetCollectionName() - } - return "" -} - -func (c *CDCWriterTemplate) partitionName(data *model.CDCData) string { - f, ok := data.Msg.(interface{ GetPartitionName() string }) - if ok { - return f.GetPartitionName() - } - return "" -} - -func (c *CDCWriterTemplate) fail(msg string, err error, data *model.CDCData, - callback WriteCallback, field ...zap.Field) { - - log.Warn(msg, append(field, - zap.String("collection_name", c.collectionName(data)), - zap.String("partition_name", c.partitionName(data)), - zap.Error(err))...) - callback.OnFail(data, errors.WithMessage(err, msg)) - c.errProtect.Inc() -} - -func (c *CDCWriterTemplate) success(collectionID int64, collectionName string, rowCount int, - data *model.CDCData, callback WriteCallback, positionFunc NotifyCollectionPositionChangeFunc) { - position := data.Msg.Position() - kd := &commonpb.KeyDataPair{ - Key: position.ChannelName, - Data: position.MsgID, - } - callback.OnSuccess(collectionID, map[string]CallbackChannelInfo{ - position.ChannelName: { - Position: kd, - MsgType: data.Msg.Type(), - MsgRowCount: rowCount, - Ts: data.Msg.EndTs(), - }, - }) - if positionFunc != nil { - positionFunc(collectionID, collectionName, position.ChannelName, kd) - } -} - -func (c *CDCWriterTemplate) checkBufferSize() { - if c.currentBufferSize >= c.bufferConfig.Size { - c.clearBufferFunc() - } -} - -func (c *CDCWriterTemplate) clearBufferFunc() { - // no copy, is a shallow copy - c.bufferDataChan <- c.bufferData[:] - c.bufferData = []lo.Tuple2[*model.CDCData, WriteCallback]{} - c.currentBufferSize = 0 -} - -func (c *CDCWriterTemplate) isSupportType(fieldType entity.FieldType) bool { - return fieldType == entity.FieldTypeBool || - fieldType == entity.FieldTypeInt8 || - fieldType == entity.FieldTypeInt16 || - fieldType == entity.FieldTypeInt32 || - fieldType == entity.FieldTypeInt64 || - fieldType == entity.FieldTypeFloat || - fieldType == entity.FieldTypeDouble || - fieldType == entity.FieldTypeString || - fieldType == entity.FieldTypeVarChar || - fieldType == entity.FieldTypeBinaryVector || - fieldType == entity.FieldTypeFloatVector -} - -func (c *CDCWriterTemplate) preCombineColumn(a []entity.Column, b []entity.Column) error { - for i := range a { - if a[i].Type() != b[i].Type() || !c.isSupportType(b[i].Type()) { - log.Warn("fail to combine the column", - zap.Any("a", a[i].Type()), zap.Any("b", b[i].Type())) - return errors.New("fail to combine the column") - } - } - return nil -} - -// combineColumn the b will be added to a. before execute the method, MUST execute the preCombineColumn -func (c *CDCWriterTemplate) combineColumn(a []entity.Column, b []entity.Column) { - for i := range a { - var values []interface{} - switch columnValue := b[i].(type) { - case *entity.ColumnBool: - for _, id := range columnValue.Data() { - values = append(values, id) - } - case *entity.ColumnInt8: - for _, id := range columnValue.Data() { - values = append(values, id) - } - case *entity.ColumnInt16: - for _, id := range columnValue.Data() { - values = append(values, id) - } - case *entity.ColumnInt32: - for _, id := range columnValue.Data() { - values = append(values, id) - } - case *entity.ColumnInt64: - for _, id := range columnValue.Data() { - values = append(values, id) - } - case *entity.ColumnFloat: - for _, id := range columnValue.Data() { - values = append(values, id) - } - case *entity.ColumnDouble: - for _, id := range columnValue.Data() { - values = append(values, id) - } - case *entity.ColumnString: - for _, id := range columnValue.Data() { - values = append(values, id) - } - case *entity.ColumnVarChar: - for _, varchar := range columnValue.Data() { - values = append(values, varchar) - } - case *entity.ColumnBinaryVector: - for _, id := range columnValue.Data() { - values = append(values, id) - } - case *entity.ColumnFloatVector: - for _, id := range columnValue.Data() { - values = append(values, id) - } - default: - log.Panic("not support column type", zap.Any("value", columnValue)) - } - for _, value := range values { - _ = a[i].AppendValue(value) - } - } -} +// import ( +// "context" +// "encoding/json" +// "fmt" +// "math/rand" +// "sync" +// "time" +// +// "github.com/cockroachdb/errors" +// "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" +// "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" +// "github.com/milvus-io/milvus-sdk-go/v2/entity" +// "github.com/milvus-io/milvus/pkg/mq/msgstream" +// "github.com/samber/lo" +// "go.uber.org/zap" +// +// "github.com/zilliztech/milvus-cdc/core/config" +// "github.com/zilliztech/milvus-cdc/core/model" +// "github.com/zilliztech/milvus-cdc/core/util" +// ) +// +// var log = util.Log +// +// type BufferConfig struct { +// Period time.Duration +// Size int64 +// } +// +// var DefaultBufferConfig = BufferConfig{ +// Period: 1 * time.Minute, +// Size: 1024 * 1024, +// } +// +// var NoBufferConfig = BufferConfig{ +// Period: 0, +// Size: -1, +// } +// +// type CDCWriterTemplate struct { +// DefaultWriter +// +// handler CDCDataHandler +// errProtect *ErrorProtect +// funcMap map[msgstream.MsgType]func(context.Context, *model.CDCData, WriteCallback) +// +// bufferConfig BufferConfig +// bufferLock sync.Mutex +// currentBufferSize int64 +// bufferOps []BufferOp +// bufferUpdatePositionFunc NotifyCollectionPositionChangeFunc +// bufferOpsChan chan []BufferOp +// +// bufferData []lo.Tuple2[*model.CDCData, WriteCallback] +// bufferDataChan chan []lo.Tuple2[*model.CDCData, WriteCallback] +// } +// +// // NewCDCWriterTemplate options must include HandlerOption +// func NewCDCWriterTemplate(options ...config.Option[*CDCWriterTemplate]) CDCWriter { +// c := &CDCWriterTemplate{ +// bufferConfig: DefaultBufferConfig, +// errProtect: FastFail(), +// } +// for _, option := range options { +// option.Apply(c) +// } +// c.funcMap = map[msgstream.MsgType]func(context.Context, *model.CDCData, WriteCallback){ +// commonpb.MsgType_CreateCollection: c.handleCreateCollection, +// commonpb.MsgType_DropCollection: c.handleDropCollection, +// commonpb.MsgType_Insert: c.handleInsert, +// commonpb.MsgType_Delete: c.handleDelete, +// commonpb.MsgType_CreatePartition: c.handleCreatePartition, +// commonpb.MsgType_DropPartition: c.handleDropPartition, +// commonpb.MsgType_CreateIndex: c.handleRPCRequest, +// commonpb.MsgType_DropIndex: c.handleRPCRequest, +// commonpb.MsgType_LoadCollection: c.handleRPCRequest, +// commonpb.MsgType_ReleaseCollection: c.handleRPCRequest, +// commonpb.MsgType_CreateDatabase: c.handleRPCRequest, +// commonpb.MsgType_DropDatabase: c.handleRPCRequest, +// } +// c.initBuffer() +// c.periodFlush() +// return c +// } +// +// func (c *CDCWriterTemplate) initBuffer() { +// c.bufferDataChan = make(chan []lo.Tuple2[*model.CDCData, WriteCallback]) +// +// // execute buffer ops +// go func() { +// for { +// select { +// case <-c.errProtect.Chan(): +// log.Warn("the error protection is triggered", zap.String("protect", c.errProtect.Info())) +// return +// default: +// } +// +// latestPositions := make(map[int64]map[string]*commonpb.KeyDataPair) +// collectionNames := make(map[int64]string) +// positionFunc := NotifyCollectionPositionChangeFunc(func(collectionID int64, collectionName string, pChannelName string, position *commonpb.KeyDataPair) { +// if position == nil { +// return +// } +// collectionNames[collectionID] = collectionName +// collectionPositions, ok := latestPositions[collectionID] +// if !ok { +// collectionPositions = make(map[string]*commonpb.KeyDataPair) +// latestPositions[collectionID] = collectionPositions +// } +// collectionPositions[pChannelName] = position +// }) +// +// bufferData := <-c.bufferDataChan +// combineDataMap := make(map[string][]*CombineData) +// c.combineDataFunc(bufferData, combineDataMap, positionFunc) +// executeSuccesses := func(successes []func()) { +// for _, success := range successes { +// success() +// } +// } +// executeFails := func(fails []func(err error), err error) { +// for _, fail := range fails { +// fail(err) +// } +// } +// +// ctx := context.Background() +// for _, combineDatas := range combineDataMap { +// for _, combineData := range combineDatas { +// var err error +// switch p := combineData.param.(type) { +// case *InsertParam: +// err = c.handler.Insert(ctx, p) +// case *DeleteParam: +// err = c.handler.Delete(ctx, p) +// case *DropCollectionParam: +// err = c.handler.DropCollection(ctx, p) +// case *DropPartitionParam: +// err = c.handler.DropPartition(ctx, p) +// case *CreateIndexParam: +// err = c.handler.CreateIndex(ctx, p) +// case *DropIndexParam: +// err = c.handler.DropIndex(ctx, p) +// case *LoadCollectionParam: +// err = c.handler.LoadCollection(ctx, p) +// case *ReleaseCollectionParam: +// err = c.handler.ReleaseCollection(ctx, p) +// case *CreateDataBaseParam: +// err = c.handler.CreateDatabase(ctx, p) +// case *DropDataBaseParam: +// err = c.handler.DropDatabase(ctx, p) +// default: +// log.Warn("invalid param", zap.Any("data", combineData)) +// continue +// } +// if err != nil { +// executeFails(combineData.fails, err) +// continue +// } +// executeSuccesses(combineData.successes) +// } +// } +// +// if c.bufferUpdatePositionFunc != nil { +// for collectionID, collectionPositions := range latestPositions { +// for pChannelName, position := range collectionPositions { +// c.bufferUpdatePositionFunc(collectionID, collectionNames[collectionID], pChannelName, position) +// } +// } +// } +// } +// }() +// } +// +// type CombineData struct { +// param any +// fails []func(err error) +// successes []func() +// } +// +// func (c *CDCWriterTemplate) combineDataFunc(dataArr []lo.Tuple2[*model.CDCData, WriteCallback], +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc) { +// +// for _, tuple := range dataArr { +// data := tuple.A +// callback := tuple.B +// switch msg := data.Msg.(type) { +// case *msgstream.InsertMsg: +// c.handleInsertBuffer(msg, data, callback, combineDataMap, positionFunc) +// case *msgstream.DeleteMsg: +// c.handleDeleteBuffer(msg, data, callback, combineDataMap, positionFunc) +// case *msgstream.DropCollectionMsg: +// c.handleDropCollectionBuffer(msg, data, callback, combineDataMap, positionFunc) +// case *msgstream.DropPartitionMsg: +// c.handleDropPartitionBuffer(msg, data, callback, combineDataMap, positionFunc) +// case *msgstream.CreateIndexMsg: +// c.handleCreateIndexBuffer(msg, data, callback, combineDataMap, positionFunc) +// case *msgstream.DropIndexMsg: +// c.handleDropIndexBuffer(msg, data, callback, combineDataMap, positionFunc) +// case *msgstream.LoadCollectionMsg: +// c.handleLoadCollectionBuffer(msg, data, callback, combineDataMap, positionFunc) +// case *msgstream.ReleaseCollectionMsg: +// c.handleReleaseCollectionBuffer(msg, data, callback, combineDataMap, positionFunc) +// case *msgstream.CreateDatabaseMsg: +// c.handleCreateDatabaseBuffer(msg, data, callback, combineDataMap, positionFunc) +// case *msgstream.DropDatabaseMsg: +// c.handleDropDatabaseBuffer(msg, data, callback, combineDataMap, positionFunc) +// } +// } +// } +// +// func (c *CDCWriterTemplate) generateBufferKey(a string, b string) string { +// return a + ":" + b +// } +// +// func (c *CDCWriterTemplate) handleInsertBuffer(msg *msgstream.InsertMsg, +// data *model.CDCData, callback WriteCallback, +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc, +// ) { +// +// collectionName := msg.CollectionName +// partitionName := msg.PartitionName +// dataKey := c.generateBufferKey(collectionName, partitionName) +// // construct columns +// var columns []entity.Column +// for _, fieldData := range msg.FieldsData { +// if column, err := entity.FieldDataColumn(fieldData, 0, -1); err == nil { +// columns = append(columns, column) +// } else { +// column, err := entity.FieldDataVector(fieldData) +// if err != nil { +// c.fail("fail to parse the data", err, data, callback) +// return +// } +// columns = append(columns, column) +// } +// } +// // new combine data for convenient usage below +// newCombineData := &CombineData{ +// param: &InsertParam{ +// CollectionName: collectionName, +// PartitionName: partitionName, +// Columns: columns, +// }, +// successes: []func(){ +// func() { +// c.success(msg.CollectionID, collectionName, len(msg.RowIDs), data, callback, positionFunc) +// }, +// }, +// fails: []func(err error){ +// func(err error) { +// c.fail("fail to insert the data", err, data, callback) +// }, +// }, +// } +// combineDataArr, ok := combineDataMap[dataKey] +// // check whether the combineDataMap contains the key, if not, add the data +// if !ok { +// combineDataMap[dataKey] = []*CombineData{ +// newCombineData, +// } +// return +// } +// lastCombineData := combineDataArr[len(combineDataArr)-1] +// insertParam, ok := lastCombineData.param.(*InsertParam) +// // check whether the last data is insert, if not, add the data to array +// if !ok { +// combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) +// return +// } +// // combine the data +// if err := c.preCombineColumn(insertParam.Columns, columns); err != nil { +// c.fail("fail to combine the data", err, data, callback) +// return +// } +// c.combineColumn(insertParam.Columns, columns) +// lastCombineData.successes = append(lastCombineData.successes, newCombineData.successes...) +// lastCombineData.fails = append(lastCombineData.fails, newCombineData.fails...) +// } +// +// func (c *CDCWriterTemplate) handleDeleteBuffer(msg *msgstream.DeleteMsg, +// data *model.CDCData, callback WriteCallback, +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc, +// ) { +// collectionName := msg.CollectionName +// partitionName := msg.PartitionName +// dataKey := c.generateBufferKey(collectionName, partitionName) +// // get the id column +// column, err := entity.IDColumns(msg.PrimaryKeys, 0, -1) +// if err != nil { +// c.fail("fail to get the id columns", err, data, callback) +// return +// } +// newCombineData := &CombineData{ +// param: &DeleteParam{ +// CollectionName: collectionName, +// PartitionName: partitionName, +// Column: column, +// }, +// successes: []func(){ +// func() { +// c.success(msg.CollectionID, collectionName, int(msg.NumRows), data, callback, positionFunc) +// }, +// }, +// fails: []func(err error){ +// func(err error) { +// c.fail("fail to delete the column", err, data, callback) +// }, +// }, +// } +// combineDataArr, ok := combineDataMap[dataKey] +// // check whether the combineDataMap contains the key, if not, add the data +// if !ok { +// combineDataMap[dataKey] = []*CombineData{ +// newCombineData, +// } +// return +// } +// lastCombineData := combineDataArr[len(combineDataArr)-1] +// deleteParam, ok := lastCombineData.param.(*DeleteParam) +// // check whether the last data is insert, if not, add the data to array +// if !ok { +// combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) +// return +// } +// // combine the data +// var values []interface{} +// switch columnValue := column.(type) { +// case *entity.ColumnInt64: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// case *entity.ColumnVarChar: +// for _, varchar := range columnValue.Data() { +// values = append(values, varchar) +// } +// default: +// c.fail("fail to combine the delete data", err, data, callback) +// } +// for _, value := range values { +// err = deleteParam.Column.AppendValue(value) +// if err != nil { +// c.fail("fail to combine the delete data", err, data, callback) +// return +// } +// } +// lastCombineData.successes = append(lastCombineData.successes, newCombineData.successes...) +// lastCombineData.fails = append(lastCombineData.fails, newCombineData.fails...) +// } +// +// func (c *CDCWriterTemplate) handleDropCollectionBuffer(msg *msgstream.DropCollectionMsg, +// data *model.CDCData, callback WriteCallback, +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc, +// ) { +// collectionName := msg.CollectionName +// dataKey := c.generateBufferKey(collectionName, "") +// newCombineData := &CombineData{ +// param: &DropCollectionParam{ +// CollectionName: collectionName, +// }, +// successes: []func(){ +// func() { +// channelInfos := make(map[string]CallbackChannelInfo) +// collectChannelInfo := func(dropCollectionMsg *msgstream.DropCollectionMsg) { +// position := dropCollectionMsg.Position() +// kd := &commonpb.KeyDataPair{ +// Key: position.ChannelName, +// Data: position.MsgID, +// } +// channelInfos[position.ChannelName] = CallbackChannelInfo{ +// Position: kd, +// Ts: dropCollectionMsg.EndTs(), +// } +// } +// collectChannelInfo(msg) +// if msgsValue := data.Extra[model.DropCollectionMsgsKey]; msgsValue != nil { +// msgs := msgsValue.([]*msgstream.DropCollectionMsg) +// for _, tsMsg := range msgs { +// collectChannelInfo(tsMsg) +// } +// } +// +// callback.OnSuccess(msg.CollectionID, channelInfos) +// if positionFunc != nil { +// for _, info := range channelInfos { +// positionFunc(msg.CollectionID, msg.CollectionName, info.Position.Key, info.Position) +// } +// } +// }, +// }, +// fails: []func(err error){ +// func(err error) { +// c.fail("fail to drop collection", err, data, callback) +// }, +// }, +// } +// combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) +// } +// +// func (c *CDCWriterTemplate) handleDropPartitionBuffer(msg *msgstream.DropPartitionMsg, +// data *model.CDCData, callback WriteCallback, +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc, +// ) { +// collectionName := msg.CollectionName +// partitionName := msg.PartitionName +// dataKey := c.generateBufferKey(collectionName, partitionName) +// newCombineData := &CombineData{ +// param: &DropPartitionParam{ +// CollectionName: collectionName, +// PartitionName: partitionName, +// }, +// successes: []func(){ +// func() { +// channelInfos := make(map[string]CallbackChannelInfo) +// collectChannelInfo := func(dropPartitionMsg *msgstream.DropPartitionMsg) { +// position := dropPartitionMsg.Position() +// kd := &commonpb.KeyDataPair{ +// Key: position.ChannelName, +// Data: position.MsgID, +// } +// channelInfos[position.ChannelName] = CallbackChannelInfo{ +// Position: kd, +// Ts: dropPartitionMsg.EndTs(), +// } +// } +// collectChannelInfo(msg) +// if msgsValue := data.Extra[model.DropPartitionMsgsKey]; msgsValue != nil { +// msgs := msgsValue.([]*msgstream.DropPartitionMsg) +// for _, tsMsg := range msgs { +// collectChannelInfo(tsMsg) +// } +// } +// +// callback.OnSuccess(msg.CollectionID, channelInfos) +// if positionFunc != nil { +// for _, info := range channelInfos { +// positionFunc(msg.CollectionID, msg.CollectionName, info.Position.Key, info.Position) +// } +// } +// }, +// }, +// fails: []func(err error){ +// func(err error) { +// c.fail("fail to drop collection", err, data, callback) +// }, +// }, +// } +// combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) +// } +// +// func (c *CDCWriterTemplate) handleCreateIndexBuffer(msg *msgstream.CreateIndexMsg, +// data *model.CDCData, callback WriteCallback, +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc, +// ) { +// dataKey := fmt.Sprintf("create_index_%s_%s_%d", msg.CollectionName, msg.IndexName, rand.Int()) +// newCombineData := &CombineData{ +// param: &CreateIndexParam{ +// CreateIndexRequest: msg.CreateIndexRequest, +// }, +// successes: []func(){ +// func() { +// c.rpcRequestSuccess(msg, data, callback, positionFunc) +// }, +// }, +// fails: []func(err error){ +// func(err error) { +// c.fail("fail to create index", err, data, callback) +// }, +// }, +// } +// combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) +// } +// +// func (c *CDCWriterTemplate) handleDropIndexBuffer(msg *msgstream.DropIndexMsg, +// data *model.CDCData, callback WriteCallback, +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc, +// ) { +// dataKey := fmt.Sprintf("drop_index_%s_%s_%d", msg.CollectionName, msg.IndexName, rand.Int()) +// newCombineData := &CombineData{ +// param: &DropIndexParam{ +// DropIndexRequest: msg.DropIndexRequest, +// }, +// successes: []func(){ +// func() { +// c.rpcRequestSuccess(msg, data, callback, positionFunc) +// }, +// }, +// fails: []func(err error){ +// func(err error) { +// c.fail("fail to drop index", err, data, callback) +// }, +// }, +// } +// combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) +// } +// +// func (c *CDCWriterTemplate) handleLoadCollectionBuffer(msg *msgstream.LoadCollectionMsg, +// data *model.CDCData, callback WriteCallback, +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc, +// ) { +// dataKey := fmt.Sprintf("load_collection_%s_%d", msg.CollectionName, rand.Int()) +// newCombineData := &CombineData{ +// param: &LoadCollectionParam{ +// LoadCollectionRequest: msg.LoadCollectionRequest, +// }, +// successes: []func(){ +// func() { +// c.rpcRequestSuccess(msg, data, callback, positionFunc) +// }, +// }, +// fails: []func(err error){ +// func(err error) { +// c.fail("fail to load collection", err, data, callback) +// }, +// }, +// } +// combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) +// } +// +// func (c *CDCWriterTemplate) handleReleaseCollectionBuffer(msg *msgstream.ReleaseCollectionMsg, +// data *model.CDCData, callback WriteCallback, +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc, +// ) { +// dataKey := fmt.Sprintf("release_collection_%s_%d", msg.CollectionName, rand.Int()) +// newCombineData := &CombineData{ +// param: &ReleaseCollectionParam{ +// ReleaseCollectionRequest: msg.ReleaseCollectionRequest, +// }, +// successes: []func(){ +// func() { +// c.rpcRequestSuccess(msg, data, callback, positionFunc) +// }, +// }, +// fails: []func(err error){ +// func(err error) { +// c.fail("fail to release collection", err, data, callback) +// }, +// }, +// } +// combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) +// } +// +// func (c *CDCWriterTemplate) handleCreateDatabaseBuffer(msg *msgstream.CreateDatabaseMsg, +// data *model.CDCData, callback WriteCallback, +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc, +// ) { +// dataKey := fmt.Sprintf("create_database_%s_%d", msg.DbName, rand.Int()) +// newCombineData := &CombineData{ +// param: &CreateDataBaseParam{ +// CreateDatabaseRequest: msg.CreateDatabaseRequest, +// }, +// successes: []func(){ +// func() { +// c.rpcRequestSuccess(msg, data, callback, positionFunc) +// }, +// }, +// fails: []func(err error){ +// func(err error) { +// c.fail("fail to create database", err, data, callback) +// }, +// }, +// } +// combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) +// } +// +// func (c *CDCWriterTemplate) handleDropDatabaseBuffer(msg *msgstream.DropDatabaseMsg, +// data *model.CDCData, callback WriteCallback, +// combineDataMap map[string][]*CombineData, +// positionFunc NotifyCollectionPositionChangeFunc, +// ) { +// dataKey := fmt.Sprintf("drop_database_%s_%d", msg.DbName, rand.Int()) +// newCombineData := &CombineData{ +// param: &DropDataBaseParam{ +// DropDatabaseRequest: msg.DropDatabaseRequest, +// }, +// successes: []func(){ +// func() { +// c.rpcRequestSuccess(msg, data, callback, positionFunc) +// }, +// }, +// fails: []func(err error){ +// func(err error) { +// c.fail("fail to drop database", err, data, callback) +// }, +// }, +// } +// combineDataMap[dataKey] = append(combineDataMap[dataKey], newCombineData) +// } +// +// func (c *CDCWriterTemplate) rpcRequestSuccess(msg msgstream.TsMsg, data *model.CDCData, callback WriteCallback, positionFunc NotifyCollectionPositionChangeFunc) { +// channelInfos := make(map[string]CallbackChannelInfo) +// position := msg.Position() +// info := CallbackChannelInfo{ +// Position: &commonpb.KeyDataPair{ +// Key: position.ChannelName, +// Data: position.MsgID, +// }, +// Ts: msg.EndTs(), +// } +// channelInfos[position.ChannelName] = info +// collectionID := util.RPCRequestCollectionID +// collectionName := util.RPCRequestCollectionName +// if value, ok := data.Extra[model.CollectionIDKey]; ok { +// collectionID = value.(int64) +// } +// if value, ok := data.Extra[model.CollectionNameKey]; ok { +// collectionName = value.(string) +// } +// callback.OnSuccess(collectionID, channelInfos) +// if positionFunc != nil { +// positionFunc(collectionID, collectionName, info.Position.Key, info.Position) +// } +// } +// +// func (c *CDCWriterTemplate) periodFlush() { +// go func() { +// if c.bufferConfig.Period <= 0 { +// return +// } +// ticker := time.NewTicker(c.bufferConfig.Period) +// for { +// <-ticker.C +// c.Flush(context.Background()) +// } +// }() +// } +// +// func (c *CDCWriterTemplate) Write(ctx context.Context, data *model.CDCData, callback WriteCallback) error { +// select { +// case <-c.errProtect.Chan(): +// log.Warn("the error protection is triggered", zap.String("protect", c.errProtect.Info())) +// return errors.New("the error protection is triggered") +// default: +// } +// +// handleFunc, ok := c.funcMap[data.Msg.Type()] +// if !ok { +// // don't execute the fail callback, because the future messages will be ignored and don't trigger the error protection +// log.Warn("not support message type", zap.Any("data", data)) +// return fmt.Errorf("not support message type, type: %s", data.Msg.Type().String()) +// } +// handleFunc(ctx, data, callback) +// return nil +// } +// +// func (c *CDCWriterTemplate) Flush(context context.Context) { +// c.bufferLock.Lock() +// defer c.bufferLock.Unlock() +// c.clearBufferFunc() +// } +// +// func (c *CDCWriterTemplate) handleCreateCollection(ctx context.Context, data *model.CDCData, callback WriteCallback) { +// msg := data.Msg.(*msgstream.CreateCollectionMsg) +// schema := &schemapb.CollectionSchema{} +// err := json.Unmarshal(msg.Schema, schema) +// if err != nil { +// c.fail("fail to unmarshal the collection schema", err, data, callback) +// return +// } +// var shardNum int32 +// if value, ok := data.Extra[model.ShardNumKey]; ok { +// shardNum = value.(int32) +// } +// level := commonpb.ConsistencyLevel_Strong +// if value, ok := data.Extra[model.ConsistencyLevelKey]; ok { +// level = value.(commonpb.ConsistencyLevel) +// } +// var properties []*commonpb.KeyValuePair +// if value, ok := data.Extra[model.CollectionPropertiesKey]; ok { +// properties = value.([]*commonpb.KeyValuePair) +// } +// +// entitySchema := &entity.Schema{} +// entitySchema = entitySchema.ReadProto(schema) +// err = c.handler.CreateCollection(ctx, &CreateCollectionParam{ +// Schema: entitySchema, +// ShardsNum: shardNum, +// ConsistencyLevel: level, +// Properties: properties, +// }) +// if err != nil { +// c.fail("fail to create the collection", err, data, callback) +// return +// } +// callback.OnSuccess(msg.CollectionID, nil) +// } +// +// func (c *CDCWriterTemplate) handleDropCollection(ctx context.Context, data *model.CDCData, callback WriteCallback) { +// c.bufferLock.Lock() +// defer c.bufferLock.Unlock() +// c.bufferData = append(c.bufferData, lo.T2(data, callback)) +// c.clearBufferFunc() +// } +// +// func (c *CDCWriterTemplate) handleInsert(ctx context.Context, data *model.CDCData, callback WriteCallback) { +// msg := data.Msg.(*msgstream.InsertMsg) +// totalSize := SizeOfInsertMsg(msg) +// if totalSize < 0 { +// c.fail("fail to get the data size", errors.New("invalid column type"), data, callback) +// return +// } +// +// c.bufferLock.Lock() +// defer c.bufferLock.Unlock() +// c.currentBufferSize += totalSize +// c.bufferData = append(c.bufferData, lo.T2(data, callback)) +// c.checkBufferSize() +// } +// +// func (c *CDCWriterTemplate) handleDelete(ctx context.Context, data *model.CDCData, callback WriteCallback) { +// msg := data.Msg.(*msgstream.DeleteMsg) +// totalSize := SizeOfDeleteMsg(msg) +// +// c.bufferLock.Lock() +// defer c.bufferLock.Unlock() +// c.currentBufferSize += totalSize +// c.bufferData = append(c.bufferData, lo.T2(data, callback)) +// c.checkBufferSize() +// } +// +// func (c *CDCWriterTemplate) handleCreatePartition(ctx context.Context, data *model.CDCData, callback WriteCallback) { +// msg := data.Msg.(*msgstream.CreatePartitionMsg) +// err := c.handler.CreatePartition(ctx, &CreatePartitionParam{ +// CollectionName: msg.CollectionName, +// PartitionName: msg.PartitionName, +// }) +// if err != nil { +// c.fail("fail to create the partition", err, data, callback) +// return +// } +// callback.OnSuccess(msg.CollectionID, nil) +// } +// +// func (c *CDCWriterTemplate) handleDropPartition(ctx context.Context, data *model.CDCData, callback WriteCallback) { +// c.bufferLock.Lock() +// defer c.bufferLock.Unlock() +// c.bufferData = append(c.bufferData, lo.T2(data, callback)) +// c.clearBufferFunc() +// } +// +// func (c *CDCWriterTemplate) handleRPCRequest(ctx context.Context, data *model.CDCData, callback WriteCallback) { +// c.bufferLock.Lock() +// defer c.bufferLock.Unlock() +// c.bufferData = append(c.bufferData, lo.T2(data, callback)) +// c.clearBufferFunc() +// } +// +// func (c *CDCWriterTemplate) collectionName(data *model.CDCData) string { +// f, ok := data.Msg.(interface{ GetCollectionName() string }) +// if ok { +// return f.GetCollectionName() +// } +// return "" +// } +// +// func (c *CDCWriterTemplate) partitionName(data *model.CDCData) string { +// f, ok := data.Msg.(interface{ GetPartitionName() string }) +// if ok { +// return f.GetPartitionName() +// } +// return "" +// } +// +// func (c *CDCWriterTemplate) fail(msg string, err error, data *model.CDCData, +// callback WriteCallback, field ...zap.Field) { +// +// log.Warn(msg, append(field, +// zap.String("collection_name", c.collectionName(data)), +// zap.String("partition_name", c.partitionName(data)), +// zap.Error(err))...) +// callback.OnFail(data, errors.WithMessage(err, msg)) +// c.errProtect.Inc() +// } +// +// func (c *CDCWriterTemplate) success(collectionID int64, collectionName string, rowCount int, +// data *model.CDCData, callback WriteCallback, positionFunc NotifyCollectionPositionChangeFunc) { +// position := data.Msg.Position() +// kd := &commonpb.KeyDataPair{ +// Key: position.ChannelName, +// Data: position.MsgID, +// } +// callback.OnSuccess(collectionID, map[string]CallbackChannelInfo{ +// position.ChannelName: { +// Position: kd, +// MsgType: data.Msg.Type(), +// MsgRowCount: rowCount, +// Ts: data.Msg.EndTs(), +// }, +// }) +// if positionFunc != nil { +// positionFunc(collectionID, collectionName, position.ChannelName, kd) +// } +// } +// +// func (c *CDCWriterTemplate) checkBufferSize() { +// if c.currentBufferSize >= c.bufferConfig.Size { +// c.clearBufferFunc() +// } +// } +// +// func (c *CDCWriterTemplate) clearBufferFunc() { +// // no copy, is a shallow copy +// c.bufferDataChan <- c.bufferData[:] +// c.bufferData = []lo.Tuple2[*model.CDCData, WriteCallback]{} +// c.currentBufferSize = 0 +// } +// +// func (c *CDCWriterTemplate) isSupportType(fieldType entity.FieldType) bool { +// return fieldType == entity.FieldTypeBool || +// fieldType == entity.FieldTypeInt8 || +// fieldType == entity.FieldTypeInt16 || +// fieldType == entity.FieldTypeInt32 || +// fieldType == entity.FieldTypeInt64 || +// fieldType == entity.FieldTypeFloat || +// fieldType == entity.FieldTypeDouble || +// fieldType == entity.FieldTypeString || +// fieldType == entity.FieldTypeVarChar || +// fieldType == entity.FieldTypeBinaryVector || +// fieldType == entity.FieldTypeFloatVector +// } +// +// func (c *CDCWriterTemplate) preCombineColumn(a []entity.Column, b []entity.Column) error { +// for i := range a { +// if a[i].Type() != b[i].Type() || !c.isSupportType(b[i].Type()) { +// log.Warn("fail to combine the column", +// zap.Any("a", a[i].Type()), zap.Any("b", b[i].Type())) +// return errors.New("fail to combine the column") +// } +// } +// return nil +// } +// +// // combineColumn the b will be added to a. before execute the method, MUST execute the preCombineColumn +// func (c *CDCWriterTemplate) combineColumn(a []entity.Column, b []entity.Column) { +// for i := range a { +// var values []interface{} +// switch columnValue := b[i].(type) { +// case *entity.ColumnBool: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// case *entity.ColumnInt8: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// case *entity.ColumnInt16: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// case *entity.ColumnInt32: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// case *entity.ColumnInt64: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// case *entity.ColumnFloat: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// case *entity.ColumnDouble: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// case *entity.ColumnString: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// case *entity.ColumnVarChar: +// for _, varchar := range columnValue.Data() { +// values = append(values, varchar) +// } +// case *entity.ColumnBinaryVector: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// case *entity.ColumnFloatVector: +// for _, id := range columnValue.Data() { +// values = append(values, id) +// } +// default: +// log.Panic("not support column type", zap.Any("value", columnValue)) +// } +// for _, value := range values { +// _ = a[i].AppendValue(value) +// } +// } +// } diff --git a/core/writer/writer_template_test.go b/core/writer/writer_template_test.go index f351c3b9..2dea29c6 100644 --- a/core/writer/writer_template_test.go +++ b/core/writer/writer_template_test.go @@ -16,532 +16,532 @@ package writer_test -import ( - "context" - "sync" - "testing" - "time" - - "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" - "github.com/milvus-io/milvus/pkg/mq/msgstream" - - "github.com/cockroachdb/errors" - "github.com/goccy/go-json" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus-sdk-go/v2/entity" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/zilliztech/milvus-cdc/core/mocks" - "github.com/zilliztech/milvus-cdc/core/model" - "github.com/zilliztech/milvus-cdc/core/writer" -) - -type AssertPosition struct { - lock sync.Mutex - positions map[int64]map[string]*commonpb.KeyDataPair - saveNum int -} - -func NewAssertPosition() *AssertPosition { - return &AssertPosition{ - positions: map[int64]map[string]*commonpb.KeyDataPair{}, - } -} - -func (a *AssertPosition) savePosition(collectionID int64, collectionName string, pChannelName string, position *commonpb.KeyDataPair) { - a.lock.Lock() - defer a.lock.Unlock() - a.saveNum++ - if _, ok := a.positions[collectionID]; !ok { - a.positions[collectionID] = map[string]*commonpb.KeyDataPair{} - } - a.positions[collectionID][pChannelName] = position -} - -func (a *AssertPosition) clear() { - a.lock.Lock() - defer a.lock.Unlock() - a.positions = map[int64]map[string]*commonpb.KeyDataPair{} - a.saveNum = 0 -} - -func TestWriterTemplateCreateCollection(t *testing.T) { - mockMilvusFactory := mocks.NewMilvusClientFactory(t) - mockMilvusClient := mocks.NewMilvusClientAPI(t) - factoryOption := writer.MilvusFactoryOption(mockMilvusFactory) - writerCallback := mocks.NewWriteCallback(t) - call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) - defer call.Unset() - - handler, err := writer.NewMilvusDataHandler(addressOption, userOption, tlsOption, timeoutOption, ignorePartition, factoryOption) - assert.NoError(t, err) - - cdcWriter := writer.NewCDCWriterTemplate( - writer.HandlerOption(handler), - writer.BufferOption(10*time.Second, 1024, nil), - writer.ErrorProtectOption(1, time.Second), - ) - - t.Run("msg type error", func(t *testing.T) { - err = cdcWriter.Write(context.Background(), &model.CDCData{ - Msg: &msgstream.CreateCollectionMsg{ - CreateCollectionRequest: msgpb.CreateCollectionRequest{ - Base: &commonpb.MsgBase{ - MsgType: commonpb.MsgType_TimeTick, - }, - }, - }, - }, writerCallback) - assert.Error(t, err) - }) - - var shardNum int32 = 5 - level := commonpb.ConsistencyLevel_Session - kv := &commonpb.KeyValuePair{Key: "foo", Value: "111"} - - pbSchema := &schemapb.CollectionSchema{ - Name: "coll", - Description: "coll-des", - AutoID: true, - Fields: []*schemapb.FieldSchema{ - { - FieldID: 100, - Name: "first", - Description: "first-desc", - IsPrimaryKey: true, - DataType: schemapb.DataType_VarChar, - }, - { - FieldID: 101, - Name: "second", - Description: "second-desc", - DataType: schemapb.DataType_Double, - }, - }, - } - pbSchemaByte, _ := json.Marshal(pbSchema) - - data := &model.CDCData{ - Msg: &msgstream.CreateCollectionMsg{ - CreateCollectionRequest: msgpb.CreateCollectionRequest{ - Base: &commonpb.MsgBase{ - MsgType: commonpb.MsgType_CreateCollection, - }, - CollectionName: "coll", - CollectionID: 1001, - Schema: pbSchemaByte, - }, - }, - Extra: map[string]any{ - model.ShardNumKey: shardNum, - model.ConsistencyLevelKey: level, - model.CollectionPropertiesKey: []*commonpb.KeyValuePair{kv}, - }, - } - - t.Run("success", func(t *testing.T) { - createCall := mockMilvusClient.On("CreateCollection", mock.Anything, mock.Anything, shardNum, mock.Anything, mock.Anything). - Run(func(args mock.Arguments) { - assert.Len(t, args, 5) - - entitySchema := args[1].(*entity.Schema) - assert.Equal(t, entitySchema.CollectionName, "coll") - assert.Equal(t, entitySchema.Description, "coll-des") - assert.True(t, entitySchema.AutoID) - assert.Len(t, entitySchema.Fields, 2) - assert.EqualValues(t, 100, entitySchema.Fields[0].ID) - assert.EqualValues(t, 101, entitySchema.Fields[1].ID) - }). - Return(nil) - defer createCall.Unset() - successCallbackCall := writerCallback.On("OnSuccess", int64(1001), mock.Anything).Return() - defer successCallbackCall.Unset() - err = cdcWriter.Write(context.Background(), data, writerCallback) - assert.NoError(t, err) - }) - - t.Run("create error", func(t *testing.T) { - createCall := mockMilvusClient.On("CreateCollection", mock.Anything, mock.Anything, shardNum, mock.Anything, mock.Anything). - Return(errors.New("create error")) - defer createCall.Unset() - failCallbackCall := writerCallback.On("OnFail", data, mock.Anything).Return() - defer failCallbackCall.Unset() - err = cdcWriter.Write(context.Background(), data, writerCallback) - assert.NoError(t, err) - - // trigger error protect - err = cdcWriter.Write(context.Background(), data, writerCallback) - assert.Error(t, err) - }) -} - -func TestWriterTemplateInsertDeleteDrop(t *testing.T) { - mockMilvusFactory := mocks.NewMilvusClientFactory(t) - factoryOption := writer.MilvusFactoryOption(mockMilvusFactory) - - //assertPosition := NewAssertPosition() - newWriter := func(assertPosition *AssertPosition) writer.CDCWriter { - handler, err := writer.NewMilvusDataHandler(addressOption, userOption, tlsOption, timeoutOption, ignorePartition, factoryOption) - assert.NoError(t, err) - return writer.NewCDCWriterTemplate( - writer.HandlerOption(handler), - writer.BufferOption(5*time.Second, 10*1024*1024, assertPosition.savePosition), - writer.ErrorProtectOption(100, time.Second), - ) - } - - // Binary vector - // Dimension of binary vector is 32 - // size := 4, = 32 / 8 - binaryVector := []byte{255, 255, 255, 0} - generateInsertData := func(collectionID int64, collectionName string, channelName string, partitionName string, msgID string) *model.CDCData { - return &model.CDCData{ - Msg: &msgstream.InsertMsg{ - InsertRequest: msgpb.InsertRequest{ - Base: &commonpb.MsgBase{ - MsgType: commonpb.MsgType_Insert, - }, - CollectionName: collectionName, - CollectionID: collectionID, - PartitionName: partitionName, - FieldsData: []*schemapb.FieldData{ - { - Type: schemapb.DataType_Bool, - FieldName: "ok", - FieldId: 101, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_BoolData{ - BoolData: &schemapb.BoolArray{ - Data: []bool{true}, - }, - }, - }, - }, - }, - { - Type: schemapb.DataType_String, - FieldName: "ok", - FieldId: 102, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_StringData{ - StringData: &schemapb.StringArray{ - Data: []string{"true"}, - }, - }, - }, - }, - }, - { - Type: schemapb.DataType_BinaryVector, - FieldName: "ok", - FieldId: 102, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: 32, - Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: binaryVector, - }, - }, - }, - }, - }, - }, - BaseMsg: msgstream.BaseMsg{ - EndTimestamp: 1000, - MsgPosition: &msgstream.MsgPosition{ - ChannelName: channelName, - MsgID: []byte(msgID), - }, - }, - }, - } - } - - generateDeleteData := func(collectionID int64, collectionName string, channelName string, partitionName string, msgID string, ids []int64) *model.CDCData { - return &model.CDCData{ - Msg: &msgstream.DeleteMsg{ - DeleteRequest: msgpb.DeleteRequest{ - Base: &commonpb.MsgBase{ - MsgType: commonpb.MsgType_Delete, - }, - CollectionName: collectionName, - CollectionID: collectionID, - PartitionName: partitionName, - PrimaryKeys: &schemapb.IDs{ - IdField: &schemapb.IDs_IntId{ - IntId: &schemapb.LongArray{ - Data: ids, - }, - }, - }, - }, - BaseMsg: msgstream.BaseMsg{ - EndTimestamp: 1000, - MsgPosition: &msgstream.MsgPosition{ - ChannelName: channelName, - MsgID: []byte(msgID), - }, - }, - }, - } - } - - generateDropData := func(collectionID int64, collectionName string, channelName, msgID, channelName2, msgID2 string) *model.CDCData { - return &model.CDCData{ - Msg: &msgstream.DropCollectionMsg{ - DropCollectionRequest: msgpb.DropCollectionRequest{ - Base: &commonpb.MsgBase{ - MsgType: commonpb.MsgType_DropCollection, - }, - CollectionName: collectionName, - CollectionID: collectionID, - }, - BaseMsg: msgstream.BaseMsg{ - EndTimestamp: 1000, - MsgPosition: &msgstream.MsgPosition{ - ChannelName: channelName, - MsgID: []byte(msgID), - }, - }, - }, - Extra: map[string]any{ - model.DropCollectionMsgsKey: []*msgstream.DropCollectionMsg{ - { - BaseMsg: msgstream.BaseMsg{ - EndTimestamp: 2000, - MsgPosition: &msgpb.MsgPosition{ - ChannelName: channelName2, - MsgID: []byte(msgID2), - }, - }, - }, - }, - }, - } - } - - t.Run("insert success", func(t *testing.T) { - assertPosition := NewAssertPosition() - defer assertPosition.clear() - writerCallback := mocks.NewWriteCallback(t) - mockMilvusClient := mocks.NewMilvusClientAPI(t) - call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) - defer call.Unset() - - successCallbackCall := writerCallback.On("OnSuccess", mock.Anything, mock.Anything).Return() - defer successCallbackCall.Unset() - insertCall := mockMilvusClient.On("Insert", mock.Anything, mock.Anything, "", mock.Anything, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { - collectionName := args[1].(string) - if collectionName == "coll" { - boolColumn := args[3].(*entity.ColumnBool) - assert.Len(t, boolColumn.Data(), 3) - stringColumn := args[4].(*entity.ColumnString) - assert.Len(t, stringColumn.Data(), 3) - byteVectorColumn := args[5].(*entity.ColumnBinaryVector) - assert.Len(t, byteVectorColumn.Data(), 3) - } - }).Return(nil, nil) - defer insertCall.Unset() - cdcWriter := newWriter(assertPosition) - - err := cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "a", "part", "a"), writerCallback) - assert.NoError(t, err) - err = cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "b", "part", "b"), writerCallback) - assert.NoError(t, err) - err = cdcWriter.Write(context.Background(), generateInsertData(int64(1002), "coll2", "a", "part", "c"), writerCallback) - assert.NoError(t, err) - err = cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "a", "part", "d"), writerCallback) - assert.NoError(t, err) - cdcWriter.Flush(context.Background()) - time.Sleep(2 * time.Second) - writerCallback.AssertCalled(t, "OnSuccess", int64(1001), mock.Anything) // 3 - writerCallback.AssertCalled(t, "OnSuccess", int64(1002), mock.Anything) // 1 - writerCallback.AssertNumberOfCalls(t, "OnSuccess", 4) - mockMilvusClient.AssertNumberOfCalls(t, "Insert", 2) - assertPosition.lock.Lock() - defer assertPosition.lock.Unlock() - assert.Equal(t, 3, assertPosition.saveNum) - position := assertPosition.positions[int64(1001)]["a"] - assert.Equal(t, "a", position.Key) - assert.Equal(t, "d", string(position.Data)) - - position = assertPosition.positions[int64(1001)]["b"] - assert.Equal(t, "b", position.Key) - assert.Equal(t, "b", string(position.Data)) - - position = assertPosition.positions[int64(1002)]["a"] - assert.Equal(t, "a", position.Key) - assert.Equal(t, "c", string(position.Data)) - }) - - t.Run("delete success", func(t *testing.T) { - assertPosition := NewAssertPosition() - defer assertPosition.clear() - - writerCallback := mocks.NewWriteCallback(t) - mockMilvusClient := mocks.NewMilvusClientAPI(t) - call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) - defer call.Unset() - successCallbackCall := writerCallback.On("OnSuccess", mock.Anything, mock.Anything).Return() - defer successCallbackCall.Unset() - - deleteCall := mockMilvusClient.On("DeleteByPks", mock.Anything, mock.Anything, "", mock.Anything).Run(func(args mock.Arguments) { - collectionName := args[1].(string) - if collectionName == "col1" { - boolColumn := args[3].(*entity.ColumnInt64) - assert.Len(t, boolColumn.Data(), 9) - assert.Equal(t, []int64{1, 2, 3, 4, 5, 6, 10, 11, 12}, boolColumn.Data()) - } - }).Return(nil) - defer deleteCall.Unset() - cdcWriter := newWriter(assertPosition) - - err := cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "a", "part", "a", []int64{1, 2, 3}), writerCallback) - assert.NoError(t, err) - err = cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "b", "part", "b", []int64{4, 5, 6}), writerCallback) - assert.NoError(t, err) - err = cdcWriter.Write(context.Background(), generateDeleteData(int64(1002), "col2", "a", "part", "c", []int64{7, 8, 9}), writerCallback) - assert.NoError(t, err) - err = cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "a", "part", "d", []int64{10, 11, 12}), writerCallback) - assert.NoError(t, err) - cdcWriter.Flush(context.Background()) - time.Sleep(2 * time.Second) - writerCallback.AssertCalled(t, "OnSuccess", int64(1001), mock.Anything) // 3 - writerCallback.AssertCalled(t, "OnSuccess", int64(1002), mock.Anything) // 1 - writerCallback.AssertNumberOfCalls(t, "OnSuccess", 4) - mockMilvusClient.AssertNumberOfCalls(t, "DeleteByPks", 2) - assertPosition.lock.Lock() - defer assertPosition.lock.Unlock() - assert.Equal(t, 3, assertPosition.saveNum) - position := assertPosition.positions[int64(1001)]["a"] - assert.Equal(t, "a", position.Key) - assert.Equal(t, "d", string(position.Data)) - - position = assertPosition.positions[int64(1001)]["b"] - assert.Equal(t, "b", position.Key) - assert.Equal(t, "b", string(position.Data)) - - position = assertPosition.positions[int64(1002)]["a"] - assert.Equal(t, "a", position.Key) - assert.Equal(t, "c", string(position.Data)) - }) - - t.Run("drop success", func(t *testing.T) { - assertPosition := NewAssertPosition() - defer assertPosition.clear() - - writerCallback := mocks.NewWriteCallback(t) - mockMilvusClient := mocks.NewMilvusClientAPI(t) - call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) - defer call.Unset() - successCallbackCall := writerCallback.On("OnSuccess", mock.Anything, mock.Anything).Return() - defer successCallbackCall.Unset() - insertCall := mockMilvusClient.On("Insert", mock.Anything, mock.Anything, "", mock.Anything, mock.Anything, mock.Anything).Return(nil, nil) - defer insertCall.Unset() - deleteCall := mockMilvusClient.On("DeleteByPks", mock.Anything, mock.Anything, "", mock.Anything).Return(nil) - defer deleteCall.Unset() - dropCall := mockMilvusClient.On("DropCollection", mock.Anything, mock.Anything).Return(nil) - defer dropCall.Unset() - cdcWriter := newWriter(assertPosition) - - err := cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "a", "part", "a"), writerCallback) - assert.NoError(t, err) - err = cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "b", "part", "b"), writerCallback) - assert.NoError(t, err) - err = cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "coll", "b", "part", "c", []int64{4, 5, 6}), writerCallback) - assert.NoError(t, err) - err = cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "a", "part", "d"), writerCallback) - assert.NoError(t, err) - err = cdcWriter.Write(context.Background(), generateDropData(int64(1001), "coll", "a", "e", "b", "f"), writerCallback) - assert.NoError(t, err) - cdcWriter.Flush(context.Background()) - - time.Sleep(2 * time.Second) - writerCallback.AssertCalled(t, "OnSuccess", int64(1001), mock.Anything) - writerCallback.AssertNumberOfCalls(t, "OnSuccess", 5) - mockMilvusClient.AssertNumberOfCalls(t, "Insert", 2) - mockMilvusClient.AssertNumberOfCalls(t, "DeleteByPks", 1) - mockMilvusClient.AssertNumberOfCalls(t, "DropCollection", 1) - assertPosition.lock.Lock() - defer assertPosition.lock.Unlock() - assert.Equal(t, 2, assertPosition.saveNum) - position := assertPosition.positions[int64(1001)]["a"] - assert.Equal(t, "a", position.Key) - assert.Equal(t, "e", string(position.Data)) - - position = assertPosition.positions[int64(1001)]["b"] - assert.Equal(t, "b", position.Key) - assert.Equal(t, "f", string(position.Data)) - }) - - t.Run("flush", func(t *testing.T) { - assertPosition := NewAssertPosition() - defer assertPosition.clear() - - writerCallback := mocks.NewWriteCallback(t) - mockMilvusClient := mocks.NewMilvusClientAPI(t) - call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) - defer call.Unset() - successCallbackCall := writerCallback.On("OnSuccess", mock.Anything, mock.Anything).Return() - defer successCallbackCall.Unset() - deleteCall := mockMilvusClient.On("DeleteByPks", mock.Anything, mock.Anything, "", mock.Anything).Return(nil) - defer deleteCall.Unset() - - cdcWriter := newWriter(assertPosition) - err := cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "a", "part", "a", []int64{1, 2, 3}), writerCallback) - assert.NoError(t, err) - - // wait the flush time - time.Sleep(7 * time.Second) - writerCallback.AssertCalled(t, "OnSuccess", int64(1001), mock.Anything) - mockMilvusClient.AssertNumberOfCalls(t, "DeleteByPks", 1) - assertPosition.lock.Lock() - defer assertPosition.lock.Unlock() - assert.Equal(t, 1, assertPosition.saveNum) - }) - - t.Run("err", func(t *testing.T) { - assertPosition := NewAssertPosition() - defer assertPosition.clear() - - writerCallback := mocks.NewWriteCallback(t) - mockMilvusClient := mocks.NewMilvusClientAPI(t) - call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) - defer call.Unset() - successCallbackCall := writerCallback.On("OnSuccess", mock.Anything, mock.Anything).Return() - defer successCallbackCall.Unset() - failCallbackCall := writerCallback.On("OnFail", mock.Anything, mock.Anything).Return() - defer failCallbackCall.Unset() - insertCall := mockMilvusClient.On("Insert", mock.Anything, mock.Anything, "", mock.Anything, mock.Anything, mock.Anything).Return(nil, nil) - defer insertCall.Unset() - deleteCall := mockMilvusClient.On("DeleteByPks", mock.Anything, mock.Anything, "", mock.Anything).Return(errors.New("delete error")) - defer deleteCall.Unset() - - handler, _ := writer.NewMilvusDataHandler(addressOption, userOption, tlsOption, timeoutOption, ignorePartition, factoryOption) - cdcWriter := writer.NewCDCWriterTemplate( - writer.HandlerOption(handler), - writer.BufferOption(5*time.Second, 10*1024*1024, assertPosition.savePosition), - writer.ErrorProtectOption(5, time.Second), - ) - - for i := 0; i < 3; i++ { - err := cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "a", "part", "a", []int64{1, 2, 3}), writerCallback) - assert.NoError(t, err) - } - cdcWriter.Flush(context.Background()) - err := cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "b", "part", "b"), writerCallback) - assert.NoError(t, err) - time.Sleep(2 * time.Second) - for i := 0; i < 6; i++ { - err := cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "a", "part", "a", []int64{1, 2, 3}), writerCallback) - assert.NoError(t, err) - } - cdcWriter.Flush(context.Background()) - time.Sleep(time.Second) - err = cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "b", "part", "b"), writerCallback) - assert.Error(t, err) - }) -} +// import ( +// "context" +// "sync" +// "testing" +// "time" +// +// "github.com/cockroachdb/errors" +// "github.com/goccy/go-json" +// "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" +// "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" +// "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" +// "github.com/milvus-io/milvus-sdk-go/v2/entity" +// "github.com/milvus-io/milvus/pkg/mq/msgstream" +// "github.com/stretchr/testify/assert" +// "github.com/stretchr/testify/mock" +// +// "github.com/zilliztech/milvus-cdc/core/mocks" +// "github.com/zilliztech/milvus-cdc/core/model" +// "github.com/zilliztech/milvus-cdc/core/writer" +// ) +// +// type AssertPosition struct { +// lock sync.Mutex +// positions map[int64]map[string]*commonpb.KeyDataPair +// saveNum int +// } +// +// func NewAssertPosition() *AssertPosition { +// return &AssertPosition{ +// positions: map[int64]map[string]*commonpb.KeyDataPair{}, +// } +// } +// +// func (a *AssertPosition) savePosition(collectionID int64, collectionName string, pChannelName string, position *commonpb.KeyDataPair) { +// a.lock.Lock() +// defer a.lock.Unlock() +// a.saveNum++ +// if _, ok := a.positions[collectionID]; !ok { +// a.positions[collectionID] = map[string]*commonpb.KeyDataPair{} +// } +// a.positions[collectionID][pChannelName] = position +// } +// +// func (a *AssertPosition) clear() { +// a.lock.Lock() +// defer a.lock.Unlock() +// a.positions = map[int64]map[string]*commonpb.KeyDataPair{} +// a.saveNum = 0 +// } +// +// func TestWriterTemplateCreateCollection(t *testing.T) { +// mockMilvusFactory := mocks.NewMilvusClientFactory(t) +// mockMilvusClient := mocks.NewMilvusClientAPI(t) +// factoryOption := writer.MilvusFactoryOption(mockMilvusFactory) +// writerCallback := mocks.NewWriteCallback(t) +// call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) +// defer call.Unset() +// +// handler, err := writer.NewMilvusDataHandler(addressOption, userOption, tlsOption, timeoutOption, ignorePartition, factoryOption) +// assert.NoError(t, err) +// +// cdcWriter := writer.NewCDCWriterTemplate( +// writer.HandlerOption(handler), +// writer.BufferOption(10*time.Second, 1024, nil), +// writer.ErrorProtectOption(1, time.Second), +// ) +// +// t.Run("msg type error", func(t *testing.T) { +// err = cdcWriter.Write(context.Background(), &model.CDCData{ +// Msg: &msgstream.CreateCollectionMsg{ +// CreateCollectionRequest: msgpb.CreateCollectionRequest{ +// Base: &commonpb.MsgBase{ +// MsgType: commonpb.MsgType_TimeTick, +// }, +// }, +// }, +// }, writerCallback) +// assert.Error(t, err) +// }) +// +// var shardNum int32 = 5 +// level := commonpb.ConsistencyLevel_Session +// kv := &commonpb.KeyValuePair{Key: "foo", Value: "111"} +// +// pbSchema := &schemapb.CollectionSchema{ +// Name: "coll", +// Description: "coll-des", +// AutoID: true, +// Fields: []*schemapb.FieldSchema{ +// { +// FieldID: 100, +// Name: "first", +// Description: "first-desc", +// IsPrimaryKey: true, +// DataType: schemapb.DataType_VarChar, +// }, +// { +// FieldID: 101, +// Name: "second", +// Description: "second-desc", +// DataType: schemapb.DataType_Double, +// }, +// }, +// } +// pbSchemaByte, _ := json.Marshal(pbSchema) +// +// data := &model.CDCData{ +// Msg: &msgstream.CreateCollectionMsg{ +// CreateCollectionRequest: msgpb.CreateCollectionRequest{ +// Base: &commonpb.MsgBase{ +// MsgType: commonpb.MsgType_CreateCollection, +// }, +// CollectionName: "coll", +// CollectionID: 1001, +// Schema: pbSchemaByte, +// }, +// }, +// Extra: map[string]any{ +// model.ShardNumKey: shardNum, +// model.ConsistencyLevelKey: level, +// model.CollectionPropertiesKey: []*commonpb.KeyValuePair{kv}, +// }, +// } +// +// t.Run("success", func(t *testing.T) { +// createCall := mockMilvusClient.On("CreateCollection", mock.Anything, mock.Anything, shardNum, mock.Anything, mock.Anything). +// Run(func(args mock.Arguments) { +// assert.Len(t, args, 5) +// +// entitySchema := args[1].(*entity.Schema) +// assert.Equal(t, entitySchema.CollectionName, "coll") +// assert.Equal(t, entitySchema.Description, "coll-des") +// assert.True(t, entitySchema.AutoID) +// assert.Len(t, entitySchema.Fields, 2) +// assert.EqualValues(t, 100, entitySchema.Fields[0].ID) +// assert.EqualValues(t, 101, entitySchema.Fields[1].ID) +// }). +// Return(nil) +// defer createCall.Unset() +// successCallbackCall := writerCallback.On("OnSuccess", int64(1001), mock.Anything).Return() +// defer successCallbackCall.Unset() +// err = cdcWriter.Write(context.Background(), data, writerCallback) +// assert.NoError(t, err) +// }) +// +// t.Run("create error", func(t *testing.T) { +// createCall := mockMilvusClient.On("CreateCollection", mock.Anything, mock.Anything, shardNum, mock.Anything, mock.Anything). +// Return(errors.New("create error")) +// defer createCall.Unset() +// failCallbackCall := writerCallback.On("OnFail", data, mock.Anything).Return() +// defer failCallbackCall.Unset() +// err = cdcWriter.Write(context.Background(), data, writerCallback) +// assert.NoError(t, err) +// +// // trigger error protect +// err = cdcWriter.Write(context.Background(), data, writerCallback) +// assert.Error(t, err) +// }) +// } +// +// func TestWriterTemplateInsertDeleteDrop(t *testing.T) { +// mockMilvusFactory := mocks.NewMilvusClientFactory(t) +// factoryOption := writer.MilvusFactoryOption(mockMilvusFactory) +// +// //assertPosition := NewAssertPosition() +// newWriter := func(assertPosition *AssertPosition) writer.CDCWriter { +// handler, err := writer.NewMilvusDataHandler(addressOption, userOption, tlsOption, timeoutOption, ignorePartition, factoryOption) +// assert.NoError(t, err) +// return writer.NewCDCWriterTemplate( +// writer.HandlerOption(handler), +// writer.BufferOption(5*time.Second, 10*1024*1024, assertPosition.savePosition), +// writer.ErrorProtectOption(100, time.Second), +// ) +// } +// +// // Binary vector +// // Dimension of binary vector is 32 +// // size := 4, = 32 / 8 +// binaryVector := []byte{255, 255, 255, 0} +// generateInsertData := func(collectionID int64, collectionName string, channelName string, partitionName string, msgID string) *model.CDCData { +// return &model.CDCData{ +// Msg: &msgstream.InsertMsg{ +// InsertRequest: msgpb.InsertRequest{ +// Base: &commonpb.MsgBase{ +// MsgType: commonpb.MsgType_Insert, +// }, +// CollectionName: collectionName, +// CollectionID: collectionID, +// PartitionName: partitionName, +// FieldsData: []*schemapb.FieldData{ +// { +// Type: schemapb.DataType_Bool, +// FieldName: "ok", +// FieldId: 101, +// Field: &schemapb.FieldData_Scalars{ +// Scalars: &schemapb.ScalarField{ +// Data: &schemapb.ScalarField_BoolData{ +// BoolData: &schemapb.BoolArray{ +// Data: []bool{true}, +// }, +// }, +// }, +// }, +// }, +// { +// Type: schemapb.DataType_String, +// FieldName: "ok", +// FieldId: 102, +// Field: &schemapb.FieldData_Scalars{ +// Scalars: &schemapb.ScalarField{ +// Data: &schemapb.ScalarField_StringData{ +// StringData: &schemapb.StringArray{ +// Data: []string{"true"}, +// }, +// }, +// }, +// }, +// }, +// { +// Type: schemapb.DataType_BinaryVector, +// FieldName: "ok", +// FieldId: 102, +// Field: &schemapb.FieldData_Vectors{ +// Vectors: &schemapb.VectorField{ +// Dim: 32, +// Data: &schemapb.VectorField_BinaryVector{ +// BinaryVector: binaryVector, +// }, +// }, +// }, +// }, +// }, +// }, +// BaseMsg: msgstream.BaseMsg{ +// EndTimestamp: 1000, +// MsgPosition: &msgstream.MsgPosition{ +// ChannelName: channelName, +// MsgID: []byte(msgID), +// }, +// }, +// }, +// } +// } +// +// generateDeleteData := func(collectionID int64, collectionName string, channelName string, partitionName string, msgID string, ids []int64) *model.CDCData { +// return &model.CDCData{ +// Msg: &msgstream.DeleteMsg{ +// DeleteRequest: msgpb.DeleteRequest{ +// Base: &commonpb.MsgBase{ +// MsgType: commonpb.MsgType_Delete, +// }, +// CollectionName: collectionName, +// CollectionID: collectionID, +// PartitionName: partitionName, +// PrimaryKeys: &schemapb.IDs{ +// IdField: &schemapb.IDs_IntId{ +// IntId: &schemapb.LongArray{ +// Data: ids, +// }, +// }, +// }, +// }, +// BaseMsg: msgstream.BaseMsg{ +// EndTimestamp: 1000, +// MsgPosition: &msgstream.MsgPosition{ +// ChannelName: channelName, +// MsgID: []byte(msgID), +// }, +// }, +// }, +// } +// } +// +// generateDropData := func(collectionID int64, collectionName string, channelName, msgID, channelName2, msgID2 string) *model.CDCData { +// return &model.CDCData{ +// Msg: &msgstream.DropCollectionMsg{ +// DropCollectionRequest: msgpb.DropCollectionRequest{ +// Base: &commonpb.MsgBase{ +// MsgType: commonpb.MsgType_DropCollection, +// }, +// CollectionName: collectionName, +// CollectionID: collectionID, +// }, +// BaseMsg: msgstream.BaseMsg{ +// EndTimestamp: 1000, +// MsgPosition: &msgstream.MsgPosition{ +// ChannelName: channelName, +// MsgID: []byte(msgID), +// }, +// }, +// }, +// Extra: map[string]any{ +// model.DropCollectionMsgsKey: []*msgstream.DropCollectionMsg{ +// { +// BaseMsg: msgstream.BaseMsg{ +// EndTimestamp: 2000, +// MsgPosition: &msgpb.MsgPosition{ +// ChannelName: channelName2, +// MsgID: []byte(msgID2), +// }, +// }, +// }, +// }, +// }, +// } +// } +// +// t.Run("insert success", func(t *testing.T) { +// assertPosition := NewAssertPosition() +// defer assertPosition.clear() +// writerCallback := mocks.NewWriteCallback(t) +// mockMilvusClient := mocks.NewMilvusClientAPI(t) +// call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) +// defer call.Unset() +// +// successCallbackCall := writerCallback.On("OnSuccess", mock.Anything, mock.Anything).Return() +// defer successCallbackCall.Unset() +// insertCall := mockMilvusClient.On("Insert", mock.Anything, mock.Anything, "", mock.Anything, mock.Anything, mock.Anything).Run(func(args mock.Arguments) { +// collectionName := args[1].(string) +// if collectionName == "coll" { +// boolColumn := args[3].(*entity.ColumnBool) +// assert.Len(t, boolColumn.Data(), 3) +// stringColumn := args[4].(*entity.ColumnString) +// assert.Len(t, stringColumn.Data(), 3) +// byteVectorColumn := args[5].(*entity.ColumnBinaryVector) +// assert.Len(t, byteVectorColumn.Data(), 3) +// } +// }).Return(nil, nil) +// defer insertCall.Unset() +// cdcWriter := newWriter(assertPosition) +// +// err := cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "a", "part", "a"), writerCallback) +// assert.NoError(t, err) +// err = cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "b", "part", "b"), writerCallback) +// assert.NoError(t, err) +// err = cdcWriter.Write(context.Background(), generateInsertData(int64(1002), "coll2", "a", "part", "c"), writerCallback) +// assert.NoError(t, err) +// err = cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "a", "part", "d"), writerCallback) +// assert.NoError(t, err) +// cdcWriter.Flush(context.Background()) +// time.Sleep(2 * time.Second) +// writerCallback.AssertCalled(t, "OnSuccess", int64(1001), mock.Anything) // 3 +// writerCallback.AssertCalled(t, "OnSuccess", int64(1002), mock.Anything) // 1 +// writerCallback.AssertNumberOfCalls(t, "OnSuccess", 4) +// mockMilvusClient.AssertNumberOfCalls(t, "Insert", 2) +// assertPosition.lock.Lock() +// defer assertPosition.lock.Unlock() +// assert.Equal(t, 3, assertPosition.saveNum) +// position := assertPosition.positions[int64(1001)]["a"] +// assert.Equal(t, "a", position.Key) +// assert.Equal(t, "d", string(position.Data)) +// +// position = assertPosition.positions[int64(1001)]["b"] +// assert.Equal(t, "b", position.Key) +// assert.Equal(t, "b", string(position.Data)) +// +// position = assertPosition.positions[int64(1002)]["a"] +// assert.Equal(t, "a", position.Key) +// assert.Equal(t, "c", string(position.Data)) +// }) +// +// t.Run("delete success", func(t *testing.T) { +// assertPosition := NewAssertPosition() +// defer assertPosition.clear() +// +// writerCallback := mocks.NewWriteCallback(t) +// mockMilvusClient := mocks.NewMilvusClientAPI(t) +// call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) +// defer call.Unset() +// successCallbackCall := writerCallback.On("OnSuccess", mock.Anything, mock.Anything).Return() +// defer successCallbackCall.Unset() +// +// deleteCall := mockMilvusClient.On("DeleteByPks", mock.Anything, mock.Anything, "", mock.Anything).Run(func(args mock.Arguments) { +// collectionName := args[1].(string) +// if collectionName == "col1" { +// boolColumn := args[3].(*entity.ColumnInt64) +// assert.Len(t, boolColumn.Data(), 9) +// assert.Equal(t, []int64{1, 2, 3, 4, 5, 6, 10, 11, 12}, boolColumn.Data()) +// } +// }).Return(nil) +// defer deleteCall.Unset() +// cdcWriter := newWriter(assertPosition) +// +// err := cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "a", "part", "a", []int64{1, 2, 3}), writerCallback) +// assert.NoError(t, err) +// err = cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "b", "part", "b", []int64{4, 5, 6}), writerCallback) +// assert.NoError(t, err) +// err = cdcWriter.Write(context.Background(), generateDeleteData(int64(1002), "col2", "a", "part", "c", []int64{7, 8, 9}), writerCallback) +// assert.NoError(t, err) +// err = cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "a", "part", "d", []int64{10, 11, 12}), writerCallback) +// assert.NoError(t, err) +// cdcWriter.Flush(context.Background()) +// time.Sleep(2 * time.Second) +// writerCallback.AssertCalled(t, "OnSuccess", int64(1001), mock.Anything) // 3 +// writerCallback.AssertCalled(t, "OnSuccess", int64(1002), mock.Anything) // 1 +// writerCallback.AssertNumberOfCalls(t, "OnSuccess", 4) +// mockMilvusClient.AssertNumberOfCalls(t, "DeleteByPks", 2) +// assertPosition.lock.Lock() +// defer assertPosition.lock.Unlock() +// assert.Equal(t, 3, assertPosition.saveNum) +// position := assertPosition.positions[int64(1001)]["a"] +// assert.Equal(t, "a", position.Key) +// assert.Equal(t, "d", string(position.Data)) +// +// position = assertPosition.positions[int64(1001)]["b"] +// assert.Equal(t, "b", position.Key) +// assert.Equal(t, "b", string(position.Data)) +// +// position = assertPosition.positions[int64(1002)]["a"] +// assert.Equal(t, "a", position.Key) +// assert.Equal(t, "c", string(position.Data)) +// }) +// +// t.Run("drop success", func(t *testing.T) { +// assertPosition := NewAssertPosition() +// defer assertPosition.clear() +// +// writerCallback := mocks.NewWriteCallback(t) +// mockMilvusClient := mocks.NewMilvusClientAPI(t) +// call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) +// defer call.Unset() +// successCallbackCall := writerCallback.On("OnSuccess", mock.Anything, mock.Anything).Return() +// defer successCallbackCall.Unset() +// insertCall := mockMilvusClient.On("Insert", mock.Anything, mock.Anything, "", mock.Anything, mock.Anything, mock.Anything).Return(nil, nil) +// defer insertCall.Unset() +// deleteCall := mockMilvusClient.On("DeleteByPks", mock.Anything, mock.Anything, "", mock.Anything).Return(nil) +// defer deleteCall.Unset() +// dropCall := mockMilvusClient.On("DropCollection", mock.Anything, mock.Anything).Return(nil) +// defer dropCall.Unset() +// cdcWriter := newWriter(assertPosition) +// +// err := cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "a", "part", "a"), writerCallback) +// assert.NoError(t, err) +// err = cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "b", "part", "b"), writerCallback) +// assert.NoError(t, err) +// err = cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "coll", "b", "part", "c", []int64{4, 5, 6}), writerCallback) +// assert.NoError(t, err) +// err = cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "a", "part", "d"), writerCallback) +// assert.NoError(t, err) +// err = cdcWriter.Write(context.Background(), generateDropData(int64(1001), "coll", "a", "e", "b", "f"), writerCallback) +// assert.NoError(t, err) +// cdcWriter.Flush(context.Background()) +// +// time.Sleep(2 * time.Second) +// writerCallback.AssertCalled(t, "OnSuccess", int64(1001), mock.Anything) +// writerCallback.AssertNumberOfCalls(t, "OnSuccess", 5) +// mockMilvusClient.AssertNumberOfCalls(t, "Insert", 2) +// mockMilvusClient.AssertNumberOfCalls(t, "DeleteByPks", 1) +// mockMilvusClient.AssertNumberOfCalls(t, "DropCollection", 1) +// assertPosition.lock.Lock() +// defer assertPosition.lock.Unlock() +// assert.Equal(t, 2, assertPosition.saveNum) +// position := assertPosition.positions[int64(1001)]["a"] +// assert.Equal(t, "a", position.Key) +// assert.Equal(t, "e", string(position.Data)) +// +// position = assertPosition.positions[int64(1001)]["b"] +// assert.Equal(t, "b", position.Key) +// assert.Equal(t, "f", string(position.Data)) +// }) +// +// t.Run("flush", func(t *testing.T) { +// assertPosition := NewAssertPosition() +// defer assertPosition.clear() +// +// writerCallback := mocks.NewWriteCallback(t) +// mockMilvusClient := mocks.NewMilvusClientAPI(t) +// call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) +// defer call.Unset() +// successCallbackCall := writerCallback.On("OnSuccess", mock.Anything, mock.Anything).Return() +// defer successCallbackCall.Unset() +// deleteCall := mockMilvusClient.On("DeleteByPks", mock.Anything, mock.Anything, "", mock.Anything).Return(nil) +// defer deleteCall.Unset() +// +// cdcWriter := newWriter(assertPosition) +// err := cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "a", "part", "a", []int64{1, 2, 3}), writerCallback) +// assert.NoError(t, err) +// +// // wait the flush time +// time.Sleep(7 * time.Second) +// writerCallback.AssertCalled(t, "OnSuccess", int64(1001), mock.Anything) +// mockMilvusClient.AssertNumberOfCalls(t, "DeleteByPks", 1) +// assertPosition.lock.Lock() +// defer assertPosition.lock.Unlock() +// assert.Equal(t, 1, assertPosition.saveNum) +// }) +// +// t.Run("err", func(t *testing.T) { +// assertPosition := NewAssertPosition() +// defer assertPosition.clear() +// +// writerCallback := mocks.NewWriteCallback(t) +// mockMilvusClient := mocks.NewMilvusClientAPI(t) +// call := mockMilvusFactory.On("NewGrpcClientWithTLSAuth", mock.Anything, address, user, password).Return(mockMilvusClient, nil) +// defer call.Unset() +// successCallbackCall := writerCallback.On("OnSuccess", mock.Anything, mock.Anything).Return() +// defer successCallbackCall.Unset() +// failCallbackCall := writerCallback.On("OnFail", mock.Anything, mock.Anything).Return() +// defer failCallbackCall.Unset() +// insertCall := mockMilvusClient.On("Insert", mock.Anything, mock.Anything, "", mock.Anything, mock.Anything, mock.Anything).Return(nil, nil) +// defer insertCall.Unset() +// deleteCall := mockMilvusClient.On("DeleteByPks", mock.Anything, mock.Anything, "", mock.Anything).Return(errors.New("delete error")) +// defer deleteCall.Unset() +// +// handler, _ := writer.NewMilvusDataHandler(addressOption, userOption, tlsOption, timeoutOption, ignorePartition, factoryOption) +// cdcWriter := writer.NewCDCWriterTemplate( +// writer.HandlerOption(handler), +// writer.BufferOption(5*time.Second, 10*1024*1024, assertPosition.savePosition), +// writer.ErrorProtectOption(5, time.Second), +// ) +// +// for i := 0; i < 3; i++ { +// err := cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "a", "part", "a", []int64{1, 2, 3}), writerCallback) +// assert.NoError(t, err) +// } +// cdcWriter.Flush(context.Background()) +// err := cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "b", "part", "b"), writerCallback) +// assert.NoError(t, err) +// time.Sleep(2 * time.Second) +// for i := 0; i < 6; i++ { +// err := cdcWriter.Write(context.Background(), generateDeleteData(int64(1001), "col1", "a", "part", "a", []int64{1, 2, 3}), writerCallback) +// assert.NoError(t, err) +// } +// cdcWriter.Flush(context.Background()) +// time.Sleep(time.Second) +// err = cdcWriter.Write(context.Background(), generateInsertData(int64(1001), "coll", "b", "part", "b"), writerCallback) +// assert.Error(t, err) +// }) +// } diff --git a/server/cdc_api.go b/server/cdc_api.go index c9e62ea3..363fbd89 100644 --- a/server/cdc_api.go +++ b/server/cdc_api.go @@ -16,14 +16,9 @@ package server -import ( - "github.com/zilliztech/milvus-cdc/core/util" - "github.com/zilliztech/milvus-cdc/server/model/request" -) - -type CDCApi interface { - util.CDCMark +import "github.com/zilliztech/milvus-cdc/server/model/request" +type CDCService interface { ReloadTask() Create(request *request.CreateRequest) (*request.CreateResponse, error) Delete(request *request.DeleteRequest) (*request.DeleteResponse, error) @@ -33,9 +28,7 @@ type CDCApi interface { List(request *request.ListRequest) (*request.ListResponse, error) } -type BaseCDC struct { - util.CDCMark -} +type BaseCDC struct{} func NewBaseCDC() *BaseCDC { return &BaseCDC{} @@ -69,6 +62,6 @@ func (b *BaseCDC) List(request *request.ListRequest) (*request.ListResponse, err return nil, nil } -func GetCDCApi(config *CDCServerConfig) CDCApi { +func GetCDCApi(config *CDCServerConfig) CDCService { return NewMetaCDC(config) } diff --git a/server/cdc_impl.go b/server/cdc_impl.go index d917331e..b845387f 100644 --- a/server/cdc_impl.go +++ b/server/cdc_impl.go @@ -18,32 +18,45 @@ package server import ( "context" - "encoding/base64" "fmt" "strings" "sync" "time" "github.com/cockroachdb/errors" - "github.com/golang/protobuf/proto" "github.com/google/uuid" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus/pkg/mq/msgstream/mqwrapper" + "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" + "github.com/milvus-io/milvus/pkg/log" "github.com/samber/lo" + "go.uber.org/zap" + + "github.com/zilliztech/milvus-cdc/core/api" "github.com/zilliztech/milvus-cdc/core/config" "github.com/zilliztech/milvus-cdc/core/pb" cdcreader "github.com/zilliztech/milvus-cdc/core/reader" "github.com/zilliztech/milvus-cdc/core/util" cdcwriter "github.com/zilliztech/milvus-cdc/core/writer" servererror "github.com/zilliztech/milvus-cdc/server/error" - "github.com/zilliztech/milvus-cdc/server/metrics" "github.com/zilliztech/milvus-cdc/server/model" "github.com/zilliztech/milvus-cdc/server/model/meta" "github.com/zilliztech/milvus-cdc/server/model/request" "github.com/zilliztech/milvus-cdc/server/store" - "go.uber.org/zap" ) +const ( + TmpCollectionID = -1 + TmpCollectionName = "-1" +) + +type ReplicateEntity struct { + channelManager api.ChannelManager + targetClient api.TargetAPI + metaOp api.MetaOp + readerObj api.Reader // TODO the reader's counter may be more than one + quitFunc func() + writerObj api.Writer +} + type MetaCDC struct { BaseCDC metaStoreFactory store.MetaStoreFactory @@ -59,9 +72,13 @@ type MetaCDC struct { } cdcTasks struct { sync.RWMutex - data map[string]*CDCTask + data map[string]*meta.TaskInfo + } + // factoryCreator FactoryCreator + replicateEntityMap struct { + sync.RWMutex + data map[string]*ReplicateEntity } - factoryCreator FactoryCreator } func NewMetaCDC(serverConfig *CDCServerConfig) *MetaCDC { @@ -99,8 +116,9 @@ func NewMetaCDC(serverConfig *CDCServerConfig) *MetaCDC { } cdc.collectionNames.data = make(map[string][]string) cdc.collectionNames.excludeData = make(map[string][]string) - cdc.cdcTasks.data = make(map[string]*CDCTask) - cdc.factoryCreator = NewCDCFactory + cdc.cdcTasks.data = make(map[string]*meta.TaskInfo) + // cdc.factoryCreator = NewCDCFactory + cdc.replicateEntityMap.data = make(map[string]*ReplicateEntity) return cdc } @@ -121,31 +139,32 @@ func (e *MetaCDC) ReloadTask() { log.Panic("fail to get all task info", zap.Error(err)) } - if reverse { - var err error - reverseTxn, commitFunc, err := e.metaStoreFactory.Txn(ctx) - if err != nil { - log.Panic("fail to new the reverse txn", zap.Error(err)) - } - for _, taskInfo := range taskInfos { - if taskInfo.MilvusConnectParam.Host == currentConfig.Host && taskInfo.MilvusConnectParam.Port == currentConfig.Port { - taskInfo.MilvusConnectParam.Host = reverseConfig.Host - taskInfo.MilvusConnectParam.Port = reverseConfig.Port - taskInfo.MilvusConnectParam.Username = reverseConfig.Username - taskInfo.MilvusConnectParam.Password = reverseConfig.Password - taskInfo.MilvusConnectParam.EnableTLS = reverseConfig.EnableTLS - if err = e.metaStoreFactory.GetTaskInfoMetaStore(ctx).Put(ctx, taskInfo, reverseTxn); err != nil { - log.Panic("fail to put the task info to metastore when reversing", zap.Error(err)) - } - if err = e.metaStoreFactory.GetTaskCollectionPositionMetaStore(ctx).Delete(ctx, &meta.TaskCollectionPosition{TaskID: taskInfo.TaskID}, reverseTxn); err != nil { - log.Panic("fail to delete the task collection position to metastore when reversing", zap.Error(err)) - } - } - } - if err = commitFunc(err); err != nil { - log.Panic("fail to commit the reverse txn", zap.Error(err)) - } - } + // if reverse { + // var err error + // reverseTxn, commitFunc, err := e.metaStoreFactory.Txn(ctx) + // if err != nil { + // log.Panic("fail to new the reverse txn", zap.Error(err)) + // } + // for _, taskInfo := range taskInfos { + // if taskInfo.MilvusConnectParam.Host == currentConfig.Host && taskInfo.MilvusConnectParam.Port == currentConfig.Port { + // taskInfo.MilvusConnectParam.Host = reverseConfig.Host + // taskInfo.MilvusConnectParam.Port = reverseConfig.Port + // taskInfo.MilvusConnectParam.Username = reverseConfig.Username + // taskInfo.MilvusConnectParam.Password = reverseConfig.Password + // taskInfo.MilvusConnectParam.EnableTLS = reverseConfig.EnableTLS + // if err = e.metaStoreFactory.GetTaskInfoMetaStore(ctx).Put(ctx, taskInfo, reverseTxn); err != nil { + // log.Panic("fail to put the task info to metastore when reversing", zap.Error(err)) + // } + // // TODO need to use new target position in the future, not delete and receive the msg from the latest position + // if err = e.metaStoreFactory.GetTaskCollectionPositionMetaStore(ctx).Delete(ctx, &meta.TaskCollectionPosition{TaskID: taskInfo.TaskID}, reverseTxn); err != nil { + // log.Panic("fail to delete the task collection position to metastore when reversing", zap.Error(err)) + // } + // } + // } + // if err = commitFunc(err); err != nil { + // log.Panic("fail to commit the reverse txn", zap.Error(err)) + // } + // } for _, taskInfo := range taskInfos { milvusAddress := fmt.Sprintf("%s:%d", taskInfo.MilvusConnectParam.Host, taskInfo.MilvusConnectParam.Port) @@ -154,15 +173,12 @@ func (e *MetaCDC) ReloadTask() { }) e.collectionNames.data[milvusAddress] = append(e.collectionNames.data[milvusAddress], newCollectionNames...) e.collectionNames.excludeData[milvusAddress] = append(e.collectionNames.excludeData[milvusAddress], taskInfo.ExcludeCollections...) - task, err := e.newCdcTask(taskInfo) - if err != nil { - log.Warn("fail to new cdc task", zap.Any("task_info", taskInfo), zap.Error(err)) - continue - } - if taskInfo.State == meta.TaskStateRunning { - if err = <-task.Resume(nil); err != nil { - log.Warn("fail to start cdc task", zap.Any("task_info", taskInfo), zap.Error(err)) - } + e.cdcTasks.Lock() + e.cdcTasks.data[taskInfo.TaskID] = taskInfo + e.cdcTasks.Unlock() + + if err := e.startInternal(taskInfo, taskInfo.State == meta.TaskStateRunning); err != nil { + log.Panic("fail to start the task", zap.Any("task_info", taskInfo), zap.Error(err)) } } } @@ -254,27 +270,12 @@ func (e *MetaCDC) Create(req *request.CreateRequest) (resp *request.CreateRespon revertCollectionNames() return nil, servererror.NewServerError(errors.WithMessage(err, "fail to put the task info to etcd")) } - - info.State = meta.TaskStateRunning - task, err := e.newCdcTask(info) + e.cdcTasks.Lock() + e.cdcTasks.data[info.TaskID] = info + e.cdcTasks.Unlock() + err = e.startInternal(info, false) if err != nil { - log.Warn("fail to new cdc task", zap.Error(err)) - return nil, servererror.NewServerError(err) - } - if err = <-task.Resume(func() error { - err = store.UpdateTaskState( - e.metaStoreFactory.GetTaskInfoMetaStore(ctx), - info.TaskID, - meta.TaskStateRunning, - []meta.TaskState{meta.TaskStateInitial}) - if err != nil { - log.Warn("fail to update the task meta", zap.Error(err)) - return servererror.NewServerError(errors.WithMessage(err, "fail to update the task meta, task_id: "+info.TaskID)) - } - return nil - }); err != nil { - log.Warn("fail to start cdc task", zap.Error(err)) - return nil, servererror.NewServerError(err) + return nil, err } return &request.CreateResponse{TaskID: info.TaskID}, nil @@ -332,35 +333,41 @@ func (e *MetaCDC) checkCollectionInfos(infos []model.CollectionInfo) error { return servererror.NewClientError("empty collection info") } - var ( - longNames []string - emptyName bool - ) - for _, info := range infos { - if info.Name == "" { - emptyName = true - } - if info.Name == cdcreader.AllCollection && len(infos) > 1 { - return servererror.NewClientError(fmt.Sprintf("make sure the only one collection if you want to use the '*' collection param, current param: %v", - lo.Map(infos, func(t model.CollectionInfo, _ int) string { - return t.Name - }))) - } - if len(info.Name) > e.config.MaxNameLength { - longNames = append(longNames, info.Name) - } - } - if !emptyName && len(longNames) == 0 { - return nil - } - var errMsg string - if emptyName { - errMsg += "there is a collection name that is empty. " - } - if len(longNames) > 0 { - errMsg += fmt.Sprintf("there are some collection names whose length exceeds 256 characters, %v", longNames) + if len(infos) != 1 || infos[0].Name != cdcreader.AllCollection { + return servererror.NewClientError("the collection info should be only one, and the collection name should be `*`. Specifying collection name will be supported in the future.") } - return servererror.NewClientError(errMsg) + return nil + + // TODO + // var ( + // longNames []string + // emptyName bool + // ) + // for _, info := range infos { + // if info.Name == "" { + // emptyName = true + // } + // if info.Name == cdcreader.AllCollection && len(infos) > 1 { + // return servererror.NewClientError(fmt.Sprintf("make sure the only one collection if you want to use the '*' collection param, current param: %v", + // lo.Map(infos, func(t model.CollectionInfo, _ int) string { + // return t.Name + // }))) + // } + // if len(info.Name) > e.config.MaxNameLength { + // longNames = append(longNames, info.Name) + // } + // } + // if !emptyName && len(longNames) == 0 { + // return nil + // } + // var errMsg string + // if emptyName { + // errMsg += "there is a collection name that is empty. " + // } + // if len(longNames) > 0 { + // errMsg += fmt.Sprintf("there are some collection names whose length exceeds 256 characters, %v", longNames) + // } + // return servererror.NewClientError(errMsg) } func (e *MetaCDC) getUuid() string { @@ -368,88 +375,38 @@ func (e *MetaCDC) getUuid() string { return strings.ReplaceAll(uid.String(), "-", "") } -func (e *MetaCDC) newCdcTask(info *meta.TaskInfo) (*CDCTask, error) { - metrics.StreamingCollectionCountVec.WithLabelValues(info.TaskID, metrics.TotalStatusLabel).Add(float64(len(info.CollectionInfos))) - - e.cdcTasks.Lock() - e.cdcTasks.data[info.TaskID] = EmptyCdcTask - metrics.TaskNumVec.AddInitial() - e.cdcTasks.Unlock() - - newReaderFunc := NewReaderFunc(func() (cdcreader.CDCReader, error) { - var err error - taskLog := log.With(zap.String("task_id", info.TaskID), zap.Error(err)) - ctx := context.Background() - positions, err := e.metaStoreFactory.GetTaskCollectionPositionMetaStore(ctx).Get(ctx, &meta.TaskCollectionPosition{TaskID: info.TaskID}, nil) +func (e *MetaCDC) startInternal(info *meta.TaskInfo, ignoreUpdateState bool) error { + milvusConnectParam := info.MilvusConnectParam + milvusAddress := fmt.Sprintf("%s:%d", milvusConnectParam.Host, milvusConnectParam.Port) + e.replicateEntityMap.RLock() + replicateEntity, ok := e.replicateEntityMap.data[milvusAddress] + e.replicateEntityMap.RUnlock() + + newReplicateEntity := func() (*ReplicateEntity, error) { + ctx := context.TODO() + timeoutCtx, cancelFunc := context.WithTimeout(ctx, time.Duration(milvusConnectParam.ConnectTimeout)*time.Second) + milvusClient, err := cdcreader.NewTarget(timeoutCtx, cdcreader.TargetConfig{ + Address: milvusAddress, + Username: milvusConnectParam.Username, + Password: milvusConnectParam.Password, + EnableTLS: milvusConnectParam.EnableTLS, + }) + cancelFunc() if err != nil { - taskLog.Warn("fail to get the task collection position", zap.Error(err)) - return nil, errors.WithMessage(err, "fail to get the task meta, task_id: "+info.TaskID) - } - sourceConfig := e.config.SourceConfig - if info.RPCRequestChannelInfo.Name != "" { - channelName := info.RPCRequestChannelInfo.Name - channelPosition := "" - if len(positions) != 0 { - position := positions[0] - if position.CollectionName != util.RPCRequestCollectionName || position.CollectionID != util.RPCRequestCollectionID { - log.Panic("the collection name or id is not match the rpc request channel info", zap.Any("position", position)) - } - kp, ok := position.Positions[channelName] - if !ok { - log.Panic("the channel name is not match the rpc request channel info", zap.Any("position", position)) - } - positionBytes, err := proto.Marshal(kp) - if err != nil { - log.Warn("fail to marshal the key data pair", zap.Error(err)) - return nil, err - } - channelPosition = base64.StdEncoding.EncodeToString(positionBytes) - } else if info.RPCRequestChannelInfo.Position != "" { - channelPosition = info.RPCRequestChannelInfo.Position - } - reader, err := cdcreader.NewChannelReader( - cdcreader.MqChannelOption(sourceConfig.Pulsar, sourceConfig.Kafka), - cdcreader.ChannelNameOption(channelName), - cdcreader.SubscriptionPositionChannelOption(mqwrapper.SubscriptionPositionLatest), - cdcreader.SeekPositionChannelOption(channelPosition), - cdcreader.DataChanChannelOption(sourceConfig.ReadChanLen), - ) - if err != nil { - return nil, errors.WithMessage(err, "fail to new the channel reader, task_id: "+info.TaskID) - } - return reader, nil - } - - taskPosition := make(map[string]map[string]*commonpb.KeyDataPair) - for _, position := range positions { - taskPosition[position.CollectionName] = position.Positions - } - - var options []config.Option[*cdcreader.MilvusCollectionReader] - for _, collectionInfo := range info.CollectionInfos { - options = append(options, cdcreader.CollectionInfoOption(collectionInfo.Name, taskPosition[collectionInfo.Name])) + log.Warn("fail to new target", zap.String("address", milvusAddress), zap.Error(err)) + return nil, servererror.NewClientError("fail to connect target milvus server") } - monitor := NewReaderMonitor(info.TaskID) - etcdConfig := config.NewMilvusEtcdConfig(config.MilvusEtcdEndpointsOption(sourceConfig.EtcdAddress), - config.MilvusEtcdRootPathOption(sourceConfig.EtcdRootPath), - config.MilvusEtcdMetaSubPathOption(sourceConfig.EtcdMetaSubPath)) - reader, err := cdcreader.NewMilvusCollectionReader(append(options, - cdcreader.EtcdOption(etcdConfig), - cdcreader.MqOption(sourceConfig.Pulsar, sourceConfig.Kafka), - cdcreader.MonitorOption(monitor), - cdcreader.ShouldReadFuncOption(GetShouldReadFunc(info)), - cdcreader.ChanLenOption(sourceConfig.ReadChanLen))...) + // TODO improve it + bufferSize := e.config.SourceConfig.ReadChanLen + channelManager, err := cdcreader.NewReplicateChannelManager(config.MQConfig{ + Pulsar: e.config.SourceConfig.Pulsar, + Kafka: e.config.SourceConfig.Kafka, + }, milvusClient, bufferSize) if err != nil { - return nil, errors.WithMessage(err, "fail to new the reader, task_id: "+info.TaskID) + log.Warn("fail to create replicate channel manager", zap.Error(err)) + return nil, servererror.NewClientError("fail to create replicate channel manager") } - return reader, nil - }) - - writeCallback := NewWriteCallback(e.metaStoreFactory, e.rootPath, info.TaskID) - newWriterFunc := NewWriterFunc(func() (cdcwriter.CDCWriter, error) { - var err error - taskLog := log.With(zap.String("task_id", info.TaskID), zap.Error(err)) - targetConfig := info.MilvusConnectParam + targetConfig := milvusConnectParam dataHandler, err := cdcwriter.NewMilvusDataHandler( cdcwriter.AddressOption(fmt.Sprintf("%s:%d", targetConfig.Host, targetConfig.Port)), cdcwriter.UserOption(targetConfig.Username, targetConfig.Password), @@ -457,39 +414,137 @@ func (e *MetaCDC) newCdcTask(info *meta.TaskInfo) (*CDCTask, error) { cdcwriter.IgnorePartitionOption(targetConfig.IgnorePartition), cdcwriter.ConnectTimeoutOption(targetConfig.ConnectTimeout)) if err != nil { - taskLog.Warn("fail to new the data handler") - return nil, errors.WithMessage(err, "fail to new the data handler, task_id: "+info.TaskID) + log.Warn("fail to new the data handler", zap.Error(err)) + return nil, servererror.NewClientError("fail to new the data handler, task_id: ") + } + writerObj := cdcwriter.NewChannelWriter(dataHandler, bufferSize) + sourceConfig := e.config.SourceConfig + metaOp, err := cdcreader.NewEtcdOp(sourceConfig.EtcdAddress, sourceConfig.EtcdRootPath, sourceConfig.EtcdMetaSubPath, sourceConfig.DefaultPartitionName) + if err != nil { + log.Warn("fail to new the meta op", zap.Error(err)) + return nil, servererror.NewClientError("fail to new the meta op") } - cacheConfig := info.WriterCacheConfig - writer := cdcwriter.NewCDCWriterTemplate( - cdcwriter.HandlerOption(NewDataHandlerWrapper(info.TaskID, dataHandler)), - cdcwriter.BufferOption(time.Duration(cacheConfig.Period)*time.Second, - int64(cacheConfig.Size), writeCallback.UpdateTaskCollectionPosition)) - return writer, nil - }) + e.replicateEntityMap.Lock() + defer e.replicateEntityMap.Unlock() + entity, ok := e.replicateEntityMap.data[milvusAddress] + if !ok { + entity = &ReplicateEntity{ + targetClient: milvusClient, + channelManager: channelManager, + metaOp: metaOp, + writerObj: writerObj, + } + e.replicateEntityMap.data[milvusAddress] = entity + go func() { + for { + replicateAPIEvent, ok := <-entity.channelManager.GetEventChan() + if !ok { + log.Warn("the replicate api event channel has closed") + return + } + log.Info("receive the replicate api event", zap.Any("event", replicateAPIEvent)) + err := entity.writerObj.HandleReplicateAPIEvent(context.Background(), replicateAPIEvent) + if err != nil { + log.Warn("fail to handle the replicate api event", zap.Error(err)) + } + } + }() + go func() { + writeCallback := NewWriteCallback(e.metaStoreFactory, e.rootPath, info.TaskID) + for { + // TODO how to close them + channelName, ok := <-entity.channelManager.GetChannelChan() + log.Info("start to replicate channel", zap.String("channel", channelName)) + if !ok { + log.Warn("the channel name channel has closed") + return + } + go func(c string) { + for { + msgPack, ok := <-entity.channelManager.GetMsgChan(c) + if !ok { + log.Warn("the data channel has closed") + return + } + // TODO debug info, should be deleted + msgTime, _ := util.ParseHybridTs(msgPack.EndTs) + log.Info("timestamp", zap.String("channel", c), zap.Time("timestamp", time.UnixMilli(msgTime))) + pChannel := msgPack.EndPositions[0].GetChannelName() + position, err := entity.writerObj.HandleReplicateMessage(context.Background(), pChannel, msgPack) + if err != nil { + log.Warn("fail to handle the replicate message", zap.Error(err)) + } + if position != nil { + writeCallback.UpdateTaskCollectionPosition(TmpCollectionID, TmpCollectionName, position.GetKey(), position) + } + } + }(channelName) + } + }() + } + return entity, nil + } + if !ok { + var err error + replicateEntity, err = newReplicateEntity() + if err != nil { + return err + } + } - e.cdcTasks.Lock() - defer e.cdcTasks.Unlock() - task := NewCdcTask(info.TaskID, e.factoryCreator(newReaderFunc, newWriterFunc), writeCallback, func() error { - // update the meta task state - err := store.UpdateTaskState( - e.metaStoreFactory.GetTaskInfoMetaStore(context.Background()), + ctx := context.Background() + taskPositions, err := e.metaStoreFactory.GetTaskCollectionPositionMetaStore(ctx).Get(ctx, &meta.TaskCollectionPosition{TaskID: info.TaskID}, nil) + if err != nil { + log.Warn("fail to get the task collection position", zap.Error(err)) + return servererror.NewServerError(errors.WithMessage(err, "fail to get the task collection position")) + } + if len(taskPositions) > 1 { + log.Warn("the task collection position is invalid", zap.Any("task_id", info.TaskID)) + return servererror.NewServerError(errors.New("the task collection position is invalid")) + } + channelSeekPosition := make(map[string]*msgpb.MsgPosition) + if len(taskPositions) == 1 { + log.Info("task seek position", zap.Any("position", taskPositions[0].Positions)) + for _, dataPair := range taskPositions[0].Positions { + channelSeekPosition[dataPair.GetKey()] = &msgpb.MsgPosition{ + ChannelName: dataPair.GetKey(), + MsgID: dataPair.GetData(), + } + } + } + taskReader, err := cdcreader.NewCollectionReader(info.TaskID, replicateEntity.channelManager, replicateEntity.metaOp, channelSeekPosition, GetShouldReadFunc(info)) + if err != nil { + log.Warn("fail to new the collection reader", zap.Error(err)) + return servererror.NewServerError(errors.WithMessage(err, "fail to new the collection reader")) + } + readCtx, cancelReadFunc := context.WithCancel(context.Background()) + e.replicateEntityMap.Lock() + replicateEntity.readerObj = taskReader + replicateEntity.quitFunc = func() { + taskReader.QuitRead(readCtx) + cancelReadFunc() + } + e.replicateEntityMap.Unlock() + + if !ignoreUpdateState { + err = store.UpdateTaskState( + e.metaStoreFactory.GetTaskInfoMetaStore(ctx), info.TaskID, - meta.TaskStatePaused, - []meta.TaskState{meta.TaskStateRunning}) + meta.TaskStateRunning, + []meta.TaskState{meta.TaskStateInitial, meta.TaskStatePaused}) if err != nil { - log.Warn("fail to update the task meta state", zap.String("task_id", info.TaskID), zap.Error(err)) + log.Warn("fail to update the task meta", zap.Error(err)) + return servererror.NewServerError(errors.WithMessage(err, "fail to update the task meta, task_id: "+info.TaskID)) } - return err - }) - e.cdcTasks.data[info.TaskID] = task - return task, nil + } + taskReader.StartRead(readCtx) + return nil } func (e *MetaCDC) Delete(req *request.DeleteRequest) (*request.DeleteResponse, error) { e.cdcTasks.RLock() - cdcTask, ok := e.cdcTasks.data[req.TaskID] + _, ok := e.cdcTasks.data[req.TaskID] e.cdcTasks.RUnlock() if !ok { return nil, servererror.NewClientError("not found the task, task_id: " + req.TaskID) @@ -497,30 +552,30 @@ func (e *MetaCDC) Delete(req *request.DeleteRequest) (*request.DeleteResponse, e var err error - err = <-cdcTask.Terminate(func() error { - var info *meta.TaskInfo - info, err = store.DeleteTask(e.metaStoreFactory, e.rootPath, req.TaskID) - if err != nil { - return servererror.NewServerError(errors.WithMessage(err, "fail to delete the task meta, task_id: "+req.TaskID)) - } - milvusAddress := fmt.Sprintf("%s:%d", info.MilvusConnectParam.Host, info.MilvusConnectParam.Port) - collectionNames := info.CollectionNames() - e.collectionNames.Lock() - if collectionNames[0] == cdcreader.AllCollection { - e.collectionNames.excludeData[milvusAddress] = []string{} - } - e.collectionNames.data[milvusAddress] = lo.Without(e.collectionNames.data[milvusAddress], collectionNames...) - e.collectionNames.Unlock() + var info *meta.TaskInfo + info, err = store.DeleteTask(e.metaStoreFactory, e.rootPath, req.TaskID) + if err != nil { + return nil, servererror.NewServerError(errors.WithMessage(err, "fail to delete the task meta, task_id: "+req.TaskID)) + } + milvusAddress := fmt.Sprintf("%s:%d", info.MilvusConnectParam.Host, info.MilvusConnectParam.Port) + collectionNames := info.CollectionNames() + e.collectionNames.Lock() + if collectionNames[0] == cdcreader.AllCollection { + e.collectionNames.excludeData[milvusAddress] = []string{} + } + e.collectionNames.data[milvusAddress] = lo.Without(e.collectionNames.data[milvusAddress], collectionNames...) + e.collectionNames.Unlock() - e.cdcTasks.Lock() - delete(e.cdcTasks.data, req.TaskID) - e.cdcTasks.Unlock() - return err - }) + e.cdcTasks.Lock() + delete(e.cdcTasks.data, req.TaskID) + e.cdcTasks.Unlock() - if err != nil { - return nil, servererror.NewServerError(errors.WithMessage(err, "fail to terminate the task, task_id: "+req.TaskID)) + e.replicateEntityMap.Lock() + if replicateEntity, ok := e.replicateEntityMap.data[milvusAddress]; ok { + replicateEntity.quitFunc() } + delete(e.replicateEntityMap.data, milvusAddress) + e.replicateEntityMap.Unlock() return &request.DeleteResponse{}, err } @@ -534,21 +589,21 @@ func (e *MetaCDC) Pause(req *request.PauseRequest) (*request.PauseResponse, erro } var err error - - err = <-cdcTask.Pause(func() error { - err = store.UpdateTaskState( - e.metaStoreFactory.GetTaskInfoMetaStore(context.Background()), - req.TaskID, - meta.TaskStatePaused, - []meta.TaskState{meta.TaskStateRunning}) - if err != nil { - return servererror.NewServerError(errors.WithMessage(err, "fail to update the task meta, task_id: "+req.TaskID)) - } - return nil - }) + err = store.UpdateTaskState( + e.metaStoreFactory.GetTaskInfoMetaStore(context.Background()), + req.TaskID, + meta.TaskStatePaused, + []meta.TaskState{meta.TaskStateRunning}) if err != nil { - return nil, servererror.NewServerError(errors.WithMessage(err, "fail to pause the task state, task_id: "+req.TaskID)) + return nil, servererror.NewServerError(errors.WithMessage(err, "fail to update the task meta, task_id: "+req.TaskID)) + } + milvusAddress := fmt.Sprintf("%s:%d", cdcTask.MilvusConnectParam.Host, cdcTask.MilvusConnectParam.Port) + e.replicateEntityMap.Lock() + if replicateEntity, ok := e.replicateEntityMap.data[milvusAddress]; ok { + replicateEntity.quitFunc() } + delete(e.replicateEntityMap.data, milvusAddress) + e.replicateEntityMap.Unlock() return &request.PauseResponse{}, err } @@ -561,24 +616,12 @@ func (e *MetaCDC) Resume(req *request.ResumeRequest) (*request.ResumeResponse, e return nil, servererror.NewClientError("not found the task, task_id: " + req.TaskID) } - var err error - - err = <-cdcTask.Resume(func() error { - err = store.UpdateTaskState( - e.metaStoreFactory.GetTaskInfoMetaStore(context.Background()), - req.TaskID, - meta.TaskStateRunning, - []meta.TaskState{meta.TaskStatePaused}) - if err != nil { - return servererror.NewServerError(errors.WithMessage(err, "fail to update the task meta, task_id: "+req.TaskID)) - } - return nil - }) - if err != nil { - return nil, servererror.NewServerError(errors.WithMessage(err, "fail to resume the task state, task_id: "+req.TaskID)) + if err := e.startInternal(cdcTask, false); err != nil { + log.Warn("fail to start the task", zap.Error(err)) + return nil, servererror.NewServerError(errors.WithMessage(err, "fail to start the task, task_id: "+req.TaskID)) } - return &request.ResumeResponse{}, err + return &request.ResumeResponse{}, nil } func (e *MetaCDC) Get(req *request.GetRequest) (*request.GetResponse, error) { diff --git a/server/cdc_impl_test.go b/server/cdc_impl_test.go index f68ed582..1d9d1d8e 100644 --- a/server/cdc_impl_test.go +++ b/server/cdc_impl_test.go @@ -22,11 +22,12 @@ import ( "github.com/cockroachdb/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + clientv3 "go.etcd.io/etcd/client/v3" + "github.com/zilliztech/milvus-cdc/core/mocks" "github.com/zilliztech/milvus-cdc/core/util" "github.com/zilliztech/milvus-cdc/server/model" "github.com/zilliztech/milvus-cdc/server/model/request" - clientv3 "go.etcd.io/etcd/client/v3" ) var ( diff --git a/server/cdc_task.go b/server/cdc_task.go index cef34c37..baaa5491 100644 --- a/server/cdc_task.go +++ b/server/cdc_task.go @@ -16,267 +16,263 @@ package server -import ( - "context" - "fmt" - "strconv" - "sync" - - "github.com/zilliztech/milvus-cdc/server/metrics" - - "github.com/cockroachdb/errors" - "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/milvus-io/milvus/pkg/mq/msgstream" - "github.com/zilliztech/milvus-cdc/core/model" - "github.com/zilliztech/milvus-cdc/core/reader" - "github.com/zilliztech/milvus-cdc/core/util" - "github.com/zilliztech/milvus-cdc/core/writer" - "github.com/zilliztech/milvus-cdc/server/model/meta" - "go.uber.org/zap" -) - -var EmptyCdcTask = &CDCTask{} - -type signal struct { - state meta.TaskState - done chan error - f func() error -} - -type CDCTask struct { - id string - factory CDCFactory - callback writer.WriteCallback - writeFailFunc func() error - signaler chan *signal - current util.Value[meta.TaskState] - workingLock sync.Mutex -} - -func NewCdcTask(taskID string, f CDCFactory, c writer.WriteCallback, w func() error) *CDCTask { - task := &CDCTask{ - id: taskID, - factory: f, - callback: c, - writeFailFunc: w, - signaler: make(chan *signal), - } - task.current.Store(meta.TaskStateInitial) - go task.handle() - return task -} - -func (c *CDCTask) Pause(p func() error) <-chan error { - d := make(chan error, 1) - c.sendSignal(&signal{meta.TaskStatePaused, d, p}) - return d -} - -func (c *CDCTask) Resume(r func() error) <-chan error { - d := make(chan error, 1) - c.sendSignal(&signal{meta.TaskStateRunning, d, r}) - return d -} - -func (c *CDCTask) Terminate(t func() error) <-chan error { - d := make(chan error, 1) - c.sendSignal(&signal{meta.TaskStateTerminate, d, t}) - return d -} - -func (c *CDCTask) handle() { - done := make(chan struct{}) - for { - s := <-c.signaler - executeF := func() error { - if s.f == nil { - return nil - } - return s.f() - } - - if s.state == c.current.Load() { - c.handleDone(s.done, nil) - continue - } - if err := c.stateCheck(s.state); err != nil { - c.handleDone(s.done, err) - continue - } - - switch s.state { - case meta.TaskStateRunning: - cdcReader, err := c.factory.NewReader() - if err != nil { - c.handleDone(s.done, errors.WithMessage(err, "fail to create reader")) - continue - } - cdcWriter, err := c.factory.NewWriter() - if err != nil { - c.handleDone(s.done, errors.WithMessage(err, "fail to create writer")) - continue - } - if err = executeF(); err != nil { - c.handleDone(s.done, errors.WithMessagef(err, "fail to change the task state, from %s to %s", - c.current.Load().String(), meta.TaskStateRunning.String())) - continue - } - go c.work(done, cdcReader, cdcWriter) - c.current.Store(meta.TaskStateRunning) - c.handleDone(s.done, nil) - case meta.TaskStatePaused: - if err := executeF(); err != nil { - c.handleDone(s.done, errors.WithMessagef(err, "fail to change the task state, from %s to %s", - c.current.Load().String(), meta.TaskStatePaused.String())) - continue - } - c.current.Store(meta.TaskStatePaused) - done <- struct{}{} - c.handleDone(s.done, nil) - case meta.TaskStateTerminate: - if err := executeF(); err != nil { - c.handleDone(s.done, errors.WithMessagef(err, "fail to change the task state, from %s to %s", - c.current.Load().String(), meta.TaskStateTerminate.String())) - continue - } - if c.current.Load() == meta.TaskStateRunning { - done <- struct{}{} - } - c.current.Store(meta.TaskStateTerminate) - c.handleDone(s.done, nil) - return - default: - log.Warn("unknown signal", zap.String("signal", s.state.String())) - } - } -} - -func (c *CDCTask) work(done <-chan struct{}, cdcReader reader.CDCReader, cdcWriter writer.CDCWriter) { - c.workingLock.Lock() - defer c.workingLock.Unlock() - - dataChan := cdcReader.StartRead(context.Background()) - writeData := func(data *model.CDCData) { - var msgType string - var count int - var collectionID int64 - if data.Msg.Type() == commonpb.MsgType_Insert { - msg := data.Msg.(*msgstream.InsertMsg) - msgType = commonpb.MsgType_Insert.String() - count = len(msg.RowIDs) - collectionID = msg.CollectionID - } else if data.Msg.Type() == commonpb.MsgType_Delete { - msg := data.Msg.(*msgstream.DeleteMsg) - msgType = commonpb.MsgType_Delete.String() - count = int(msg.NumRows) - collectionID = msg.CollectionID - } - if msgType != "" { - metrics.ReadMsgRowCountVec.WithLabelValues(c.id, strconv.FormatInt(collectionID, 10), msgType).Add(float64(count)) - } - - if err := cdcWriter.Write(context.Background(), data, c.callback); err != nil { - log.Warn("fail to write the data", zap.Any("data", data), zap.Error(err)) - err = <-c.Pause(c.writeFailFunc) - if err != nil { - log.Warn("fail to pause inner", zap.Error(err)) - } - } - } - quit := func() { - cdcReader.QuitRead(context.Background()) - for { - select { - case data := <-dataChan: - writeData(data) - default: - cdcWriter.Flush(context.Background()) - return - } - } - } - - for { - select { - case <-done: - quit() - return - default: - select { - case data := <-dataChan: - writeData(data) - case <-done: - quit() - return - } - } - } -} - -func (c *CDCTask) handleDone(d chan error, err error) { - d <- err - close(d) -} - -func (c *CDCTask) stateCheck(state meta.TaskState) error { - currentState := c.current.Load() - if currentState == state { - return fmt.Errorf("the current task state is similar to the target state, current state: %s", currentState.String()) - } - if state == meta.TaskStatePaused && currentState != meta.TaskStateRunning { - return fmt.Errorf("the task state isn't running, current state: %s", currentState.String()) - } - if state == meta.TaskStateRunning && currentState == meta.TaskStateTerminate { - return fmt.Errorf("the task has terminated") - } - return nil -} - -func (c *CDCTask) sendSignal(s *signal) { - if err := c.stateCheck(s.state); err != nil { - log.Warn("fail to check the task state", zap.Error(err)) - c.handleDone(s.done, err) - return - } - - if c.current.Load() == meta.TaskStateTerminate { - log.Warn("the task has terminated") - c.handleDone(s.done, fmt.Errorf("the task has terminated")) - return - } - - c.signaler <- s -} - -//go:generate mockery --name=CDCFactory --filename=cdc_factory_mock.go --output=./mocks -type CDCFactory interface { - util.CDCMark - NewReader() (reader.CDCReader, error) - NewWriter() (writer.CDCWriter, error) -} - -type NewReaderFunc func() (reader.CDCReader, error) -type NewWriterFunc func() (writer.CDCWriter, error) -type FactoryCreator func(readerFunc NewReaderFunc, writerFunc NewWriterFunc) CDCFactory - -type DefaultCDCFactory struct { - util.CDCMark - newReader NewReaderFunc - newWriter NewWriterFunc -} - -func NewDefaultCDCFactory(r NewReaderFunc, w NewWriterFunc) CDCFactory { - return &DefaultCDCFactory{newReader: r, newWriter: w} -} - -func (d *DefaultCDCFactory) NewReader() (reader.CDCReader, error) { - return d.newReader() -} - -func (d *DefaultCDCFactory) NewWriter() (writer.CDCWriter, error) { - return d.newWriter() -} - -func NewCDCFactory(readerFunc NewReaderFunc, writerFunc NewWriterFunc) CDCFactory { - return NewDefaultCDCFactory(readerFunc, writerFunc) -} +// import ( +// "context" +// "fmt" +// "sync" +// +// "github.com/cockroachdb/errors" +// "go.uber.org/zap" +// +// "github.com/zilliztech/milvus-cdc/core/api" +// "github.com/zilliztech/milvus-cdc/core/util" +// "github.com/zilliztech/milvus-cdc/server/model/meta" +// ) +// +// var EmptyCdcTask = &CDCTask{} +// +// type signal struct { +// state meta.TaskState +// done chan error +// f func() error +// } +// +// type CDCTask struct { +// id string +// // factory CDCFactory +// // callback writer.WriteCallback +// writeFailFunc func() error +// signaler chan *signal +// current util.Value[meta.TaskState] +// workingLock sync.Mutex +// readerObj api.Reader +// } +// +// func NewCdcTask(taskID string, r api.Reader, w func() error) *CDCTask { +// task := &CDCTask{ +// id: taskID, +// // factory: f, +// // callback: c, +// writeFailFunc: w, +// signaler: make(chan *signal), +// readerObj: r, +// } +// task.current.Store(meta.TaskStateInitial) +// go task.handle() +// return task +// } +// +// func (c *CDCTask) Pause(p func() error) <-chan error { +// d := make(chan error, 1) +// c.sendSignal(&signal{meta.TaskStatePaused, d, p}) +// return d +// } +// +// func (c *CDCTask) Resume(r func() error) <-chan error { +// d := make(chan error, 1) +// c.sendSignal(&signal{meta.TaskStateRunning, d, r}) +// return d +// } +// +// func (c *CDCTask) Terminate(t func() error) <-chan error { +// d := make(chan error, 1) +// c.sendSignal(&signal{meta.TaskStateTerminate, d, t}) +// return d +// } +// +// func (c *CDCTask) handle() { +// done := make(chan struct{}) +// for { +// s := <-c.signaler +// executeF := func() error { +// if s.f == nil { +// return nil +// } +// return s.f() +// } +// +// if s.state == c.current.Load() { +// c.handleDone(s.done, nil) +// continue +// } +// if err := c.stateCheck(s.state); err != nil { +// c.handleDone(s.done, err) +// continue +// } +// +// switch s.state { +// case meta.TaskStateRunning: +// // cdcReader, err := c.factory.NewReader() +// // if err != nil { +// // c.handleDone(s.done, errors.WithMessage(err, "fail to create reader")) +// // continue +// // } +// // cdcWriter, err := c.factory.NewWriter() +// // if err != nil { +// // c.handleDone(s.done, errors.WithMessage(err, "fail to create writer")) +// // continue +// // } +// if err := executeF(); err != nil { +// c.handleDone(s.done, errors.WithMessagef(err, "fail to change the task state, from %s to %s", +// c.current.Load().String(), meta.TaskStateRunning.String())) +// continue +// } +// c.readerObj.StartRead(context.Background()) +// // go c.work(done, cdcReader, cdcWriter) +// c.current.Store(meta.TaskStateRunning) +// c.handleDone(s.done, nil) +// case meta.TaskStatePaused: +// if err := executeF(); err != nil { +// c.handleDone(s.done, errors.WithMessagef(err, "fail to change the task state, from %s to %s", +// c.current.Load().String(), meta.TaskStatePaused.String())) +// continue +// } +// c.current.Store(meta.TaskStatePaused) +// done <- struct{}{} +// c.handleDone(s.done, nil) +// case meta.TaskStateTerminate: +// if err := executeF(); err != nil { +// c.handleDone(s.done, errors.WithMessagef(err, "fail to change the task state, from %s to %s", +// c.current.Load().String(), meta.TaskStateTerminate.String())) +// continue +// } +// if c.current.Load() == meta.TaskStateRunning { +// done <- struct{}{} +// } +// c.current.Store(meta.TaskStateTerminate) +// c.handleDone(s.done, nil) +// return +// default: +// log.Warn("unknown signal", zap.String("signal", s.state.String())) +// } +// } +// } +// +// // func (c *CDCTask) work(done <-chan struct{}, cdcReader reader.CDCReader, cdcWriter writer.CDCWriter) { +// // c.workingLock.Lock() +// // defer c.workingLock.Unlock() +// // +// // dataChan := cdcReader.StartRead(context.Background()) +// // writeData := func(data *model.CDCData) { +// // var msgType string +// // var count int +// // var collectionID int64 +// // if data.Msg.Type() == commonpb.MsgType_Insert { +// // msg := data.Msg.(*msgstream.InsertMsg) +// // msgType = commonpb.MsgType_Insert.String() +// // count = len(msg.RowIDs) +// // collectionID = msg.TmpCollectionID +// // } else if data.Msg.Type() == commonpb.MsgType_Delete { +// // msg := data.Msg.(*msgstream.DeleteMsg) +// // msgType = commonpb.MsgType_Delete.String() +// // count = int(msg.NumRows) +// // collectionID = msg.TmpCollectionID +// // } +// // if msgType != "" { +// // metrics.ReadMsgRowCountVec.WithLabelValues(c.id, strconv.FormatInt(collectionID, 10), msgType).Add(float64(count)) +// // } +// // +// // if err := cdcWriter.Write(context.Background(), data, c.callback); err != nil { +// // log.Warn("fail to write the data", zap.Any("data", data), zap.Error(err)) +// // err = <-c.Pause(c.writeFailFunc) +// // if err != nil { +// // log.Warn("fail to pause inner", zap.Error(err)) +// // } +// // } +// // } +// // quit := func() { +// // cdcReader.QuitRead(context.Background()) +// // for { +// // select { +// // case data := <-dataChan: +// // writeData(data) +// // default: +// // cdcWriter.Flush(context.Background()) +// // return +// // } +// // } +// // } +// // +// // for { +// // select { +// // case <-done: +// // quit() +// // return +// // default: +// // select { +// // case data := <-dataChan: +// // writeData(data) +// // case <-done: +// // quit() +// // return +// // } +// // } +// // } +// // } +// +// func (c *CDCTask) handleDone(d chan error, err error) { +// d <- err +// close(d) +// } +// +// func (c *CDCTask) stateCheck(state meta.TaskState) error { +// currentState := c.current.Load() +// if currentState == state { +// return fmt.Errorf("the current task state is similar to the target state, current state: %s", currentState.String()) +// } +// if state == meta.TaskStatePaused && currentState != meta.TaskStateRunning { +// return fmt.Errorf("the task state isn't running, current state: %s", currentState.String()) +// } +// if state == meta.TaskStateRunning && currentState == meta.TaskStateTerminate { +// return fmt.Errorf("the task has terminated") +// } +// return nil +// } +// +// func (c *CDCTask) sendSignal(s *signal) { +// if err := c.stateCheck(s.state); err != nil { +// log.Warn("fail to check the task state", zap.Error(err)) +// c.handleDone(s.done, err) +// return +// } +// +// if c.current.Load() == meta.TaskStateTerminate { +// log.Warn("the task has terminated") +// c.handleDone(s.done, fmt.Errorf("the task has terminated")) +// return +// } +// +// c.signaler <- s +// } +// +// // type CDCFactory interface { +// // util.CDCMark +// // NewReader() (reader.CDCReader, error) +// // NewWriter() (writer.CDCWriter, error) +// // } +// // +// // type NewReaderFunc func() (reader.CDCReader, error) +// // type NewWriterFunc func() (writer.CDCWriter, error) +// // type FactoryCreator func(readerFunc NewReaderFunc, writerFunc NewWriterFunc) CDCFactory +// // +// // type DefaultCDCFactory struct { +// // util.CDCMark +// // newReader NewReaderFunc +// // newWriter NewWriterFunc +// // } +// // +// // func NewDefaultCDCFactory(r NewReaderFunc, w NewWriterFunc) CDCFactory { +// // return &DefaultCDCFactory{newReader: r, newWriter: w} +// // } +// // +// // func (d *DefaultCDCFactory) NewReader() (reader.CDCReader, error) { +// // return d.newReader() +// // } +// // +// // func (d *DefaultCDCFactory) NewWriter() (writer.CDCWriter, error) { +// // return d.newWriter() +// // } +// // +// // func NewCDCFactory(readerFunc NewReaderFunc, writerFunc NewWriterFunc) CDCFactory { +// // return NewDefaultCDCFactory(readerFunc, writerFunc) +// // } diff --git a/server/cdc_task_test.go b/server/cdc_task_test.go index b62a172a..b0c9d26d 100644 --- a/server/cdc_task_test.go +++ b/server/cdc_task_test.go @@ -20,19 +20,18 @@ import ( "testing" "time" + "github.com/cockroachdb/errors" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus/pkg/mq/msgstream" - - "github.com/zilliztech/milvus-cdc/server/model/meta" - - "github.com/cockroachdb/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + "github.com/zilliztech/milvus-cdc/core/mocks" "github.com/zilliztech/milvus-cdc/core/model" "github.com/zilliztech/milvus-cdc/core/reader" "github.com/zilliztech/milvus-cdc/core/writer" + "github.com/zilliztech/milvus-cdc/server/model/meta" ) func TestInvalidOpCDCTask(t *testing.T) { diff --git a/server/configs/cdc.yaml b/server/configs/cdc.yaml index fe66ab12..480f88c5 100644 --- a/server/configs/cdc.yaml +++ b/server/configs/cdc.yaml @@ -4,8 +4,7 @@ metaStoreConfig: storeType: etcd etcdEndpoints: - localhost:2379 -# mysqlSourceUrl: root:root@tcp(127.0.0.1:3306)/milvus-cdc?charset=utf8 - mysqlSourceUrl: milvuscdc:1qaz@WSX@tcp(dev-vdc-mysql.cluster-c0ybmd1el2xt.us-west-2.rds.amazonaws.com:3306)/milvuscdc?charset=utf8 + mysqlSourceUrl: root:root@tcp(127.0.0.1:3306)/milvus-cdc?charset=utf8 rootPath: cdc sourceConfig: etcdAddress: diff --git a/server/data_handler_wrapper.go b/server/data_handler_wrapper.go index 556ebc60..97d31ec1 100644 --- a/server/data_handler_wrapper.go +++ b/server/data_handler_wrapper.go @@ -16,127 +16,135 @@ package server -import ( - "context" - - "github.com/zilliztech/milvus-cdc/core/util" - "github.com/zilliztech/milvus-cdc/core/writer" - "github.com/zilliztech/milvus-cdc/server/metrics" -) - -type DataHandlerWrapper struct { - writer.DefaultDataHandler - taskID string - handler writer.CDCDataHandler -} - -func NewDataHandlerWrapper(taskID string, handler writer.CDCDataHandler) writer.CDCDataHandler { - return &DataHandlerWrapper{ - taskID: taskID, - handler: handler, - } -} - -func (d *DataHandlerWrapper) metric(collectionName string, apiType string, isErr bool) { - if isErr { - metrics.APIExecuteCountVec.WithLabelValues(d.taskID, collectionName, apiType, metrics.FailStatusLabel).Inc() - return - } - metrics.APIExecuteCountVec.WithLabelValues(d.taskID, collectionName, apiType, metrics.SuccessStatusLabel).Inc() -} - -func (d *DataHandlerWrapper) CreateCollection(ctx context.Context, param *writer.CreateCollectionParam) (err error) { - defer func() { - d.metric(param.Schema.CollectionName, "CreateCollection", err != nil) - }() - err = d.handler.CreateCollection(ctx, param) - return -} - -func (d *DataHandlerWrapper) DropCollection(ctx context.Context, param *writer.DropCollectionParam) (err error) { - defer func() { - d.metric(param.CollectionName, "DropCollection", err != nil) - }() - err = d.handler.DropCollection(ctx, param) - return -} - -func (d *DataHandlerWrapper) Insert(ctx context.Context, param *writer.InsertParam) (err error) { - defer func() { - d.metric(param.CollectionName, "Insert", err != nil) - }() - err = d.handler.Insert(ctx, param) - return -} - -func (d *DataHandlerWrapper) Delete(ctx context.Context, param *writer.DeleteParam) (err error) { - defer func() { - d.metric(param.CollectionName, "Delete", err != nil) - }() - err = d.handler.Delete(ctx, param) - return -} - -func (d *DataHandlerWrapper) CreatePartition(ctx context.Context, param *writer.CreatePartitionParam) (err error) { - defer func() { - d.metric(param.CollectionName, "CreatePartition", err != nil) - }() - err = d.handler.CreatePartition(ctx, param) - return -} - -func (d *DataHandlerWrapper) DropPartition(ctx context.Context, param *writer.DropPartitionParam) (err error) { - defer func() { - d.metric(param.CollectionName, "DropPartition", err != nil) - }() - err = d.handler.DropPartition(ctx, param) - return -} - -func (d *DataHandlerWrapper) CreateIndex(ctx context.Context, param *writer.CreateIndexParam) (err error) { - defer func() { - d.metric(param.CollectionName, "CreateIndex", err != nil) - }() - err = d.handler.CreateIndex(ctx, param) - return -} - -func (d *DataHandlerWrapper) DropIndex(ctx context.Context, param *writer.DropIndexParam) (err error) { - defer func() { - d.metric(param.CollectionName, "DropIndex", err != nil) - }() - err = d.handler.DropIndex(ctx, param) - return -} - -func (d *DataHandlerWrapper) LoadCollection(ctx context.Context, param *writer.LoadCollectionParam) (err error) { - defer func() { - d.metric(param.CollectionName, "LoadCollection", err != nil) - }() - err = d.handler.LoadCollection(ctx, param) - return -} - -func (d *DataHandlerWrapper) ReleaseCollection(ctx context.Context, param *writer.ReleaseCollectionParam) (err error) { - defer func() { - d.metric(param.CollectionName, "ReleaseCollection", err != nil) - }() - err = d.handler.ReleaseCollection(ctx, param) - return -} - -func (d *DataHandlerWrapper) CreateDatabase(ctx context.Context, param *writer.CreateDataBaseParam) (err error) { - defer func() { - d.metric(util.RPCRequestCollectionName, "CreateDatabase", err != nil) - }() - err = d.handler.CreateDatabase(ctx, param) - return -} - -func (d *DataHandlerWrapper) DropDatabase(ctx context.Context, param *writer.DropDataBaseParam) (err error) { - defer func() { - d.metric(util.RPCRequestCollectionName, "DropDatabase", err != nil) - }() - err = d.handler.DropDatabase(ctx, param) - return -} +// import ( +// "context" +// +// "github.com/zilliztech/milvus-cdc/core/util" +// "github.com/zilliztech/milvus-cdc/core/writer" +// "github.com/zilliztech/milvus-cdc/server/metrics" +// ) +// +// type DataHandlerWrapper struct { +// writer.DefaultDataHandler +// taskID string +// handler writer.CDCDataHandler +// } +// +// func NewDataHandlerWrapper(taskID string, handler writer.CDCDataHandler) writer.CDCDataHandler { +// return &DataHandlerWrapper{ +// taskID: taskID, +// handler: handler, +// } +// } +// +// func (d *DataHandlerWrapper) metric(collectionName string, apiType string, isErr bool) { +// if isErr { +// metrics.APIExecuteCountVec.WithLabelValues(d.taskID, collectionName, apiType, metrics.FailStatusLabel).Inc() +// return +// } +// metrics.APIExecuteCountVec.WithLabelValues(d.taskID, collectionName, apiType, metrics.SuccessStatusLabel).Inc() +// } +// +// func (d *DataHandlerWrapper) CreateCollection(ctx context.Context, param *writer.CreateCollectionParam) (err error) { +// defer func() { +// d.metric(param.Schema.CollectionName, "CreateCollection", err != nil) +// }() +// err = d.handler.CreateCollection(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) DropCollection(ctx context.Context, param *writer.DropCollectionParam) (err error) { +// defer func() { +// d.metric(param.CollectionName, "DropCollection", err != nil) +// }() +// err = d.handler.DropCollection(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) Insert(ctx context.Context, param *writer.InsertParam) (err error) { +// defer func() { +// d.metric(param.CollectionName, "Insert", err != nil) +// }() +// err = d.handler.Insert(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) Delete(ctx context.Context, param *writer.DeleteParam) (err error) { +// defer func() { +// d.metric(param.CollectionName, "Delete", err != nil) +// }() +// err = d.handler.Delete(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) CreatePartition(ctx context.Context, param *writer.CreatePartitionParam) (err error) { +// defer func() { +// d.metric(param.CollectionName, "CreatePartition", err != nil) +// }() +// err = d.handler.CreatePartition(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) DropPartition(ctx context.Context, param *writer.DropPartitionParam) (err error) { +// defer func() { +// d.metric(param.CollectionName, "DropPartition", err != nil) +// }() +// err = d.handler.DropPartition(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) CreateIndex(ctx context.Context, param *writer.CreateIndexParam) (err error) { +// defer func() { +// d.metric(param.CollectionName, "CreateIndex", err != nil) +// }() +// err = d.handler.CreateIndex(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) DropIndex(ctx context.Context, param *writer.DropIndexParam) (err error) { +// defer func() { +// d.metric(param.CollectionName, "DropIndex", err != nil) +// }() +// err = d.handler.DropIndex(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) LoadCollection(ctx context.Context, param *writer.LoadCollectionParam) (err error) { +// defer func() { +// d.metric(param.CollectionName, "LoadCollection", err != nil) +// }() +// err = d.handler.LoadCollection(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) ReleaseCollection(ctx context.Context, param *writer.ReleaseCollectionParam) (err error) { +// defer func() { +// d.metric(param.CollectionName, "ReleaseCollection", err != nil) +// }() +// err = d.handler.ReleaseCollection(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) CreateDatabase(ctx context.Context, param *writer.CreateDataBaseParam) (err error) { +// defer func() { +// d.metric(util.RPCRequestCollectionName, "CreateDatabase", err != nil) +// }() +// err = d.handler.CreateDatabase(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) DropDatabase(ctx context.Context, param *writer.DropDataBaseParam) (err error) { +// defer func() { +// d.metric(util.RPCRequestCollectionName, "DropDatabase", err != nil) +// }() +// err = d.handler.DropDatabase(ctx, param) +// return +// } +// +// func (d *DataHandlerWrapper) ReplicateMessage(ctx context.Context, param *writer.ReplicateMessageParam) (err error) { +// defer func() { +// d.metric(util.RPCRequestCollectionName, "ReplicateMessage", err != nil) +// }() +// err = d.handler.ReplicateMessage(ctx, param) +// return +// } diff --git a/server/go.mod b/server/go.mod index 1f8cbbb4..f3634b96 100644 --- a/server/go.mod +++ b/server/go.mod @@ -6,19 +6,16 @@ require ( github.com/cockroachdb/errors v1.9.1 github.com/go-sql-driver/mysql v1.7.1 github.com/goccy/go-json v0.10.2 - github.com/golang/protobuf v1.5.3 github.com/google/uuid v1.3.0 - github.com/milvus-io/milvus-proto/go-api/v2 v2.3.0-dev.1.0.20230716112827-c3fe148f5e1d + github.com/milvus-io/milvus-proto/go-api/v2 v2.3.2-0.20230919092633-6ef446ad2aab github.com/milvus-io/milvus/pkg v0.0.2-0.20230823021022-7af0f7d90cee github.com/mitchellh/mapstructure v1.5.0 github.com/prometheus/client_golang v1.14.0 github.com/samber/lo v1.27.0 github.com/stretchr/testify v1.8.3 github.com/zilliztech/milvus-cdc/core v0.0.1 - go.etcd.io/etcd/api/v3 v3.5.5 go.etcd.io/etcd/client/v3 v3.5.5 go.uber.org/zap v1.21.0 - google.golang.org/grpc v1.54.0 sigs.k8s.io/yaml v1.2.0 ) @@ -53,8 +50,9 @@ require ( github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt v3.2.2+incompatible // indirect + github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.4 // indirect - github.com/google/btree v1.0.1 // indirect + github.com/google/btree v1.1.2 // indirect github.com/gorilla/websocket v1.4.2 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect @@ -111,6 +109,7 @@ require ( github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect go.etcd.io/bbolt v1.3.6 // indirect + go.etcd.io/etcd/api/v3 v3.5.5 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.5 // indirect go.etcd.io/etcd/client/v2 v2.305.5 // indirect go.etcd.io/etcd/pkg/v3 v3.5.5 // indirect @@ -138,6 +137,7 @@ require ( golang.org/x/time v0.3.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633 // indirect + google.golang.org/grpc v1.54.0 // indirect google.golang.org/protobuf v1.30.0 // indirect gopkg.in/ini.v1 v1.62.0 // indirect gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect @@ -147,7 +147,8 @@ require ( replace ( github.com/apache/pulsar-client-go => github.com/milvus-io/pulsar-client-go v0.6.10 - github.com/milvus-io/milvus/pkg => github.com/SimFG/milvus/pkg v0.0.0-20230823080606-a88e7c27d190 + github.com/milvus-io/milvus-sdk-go/v2 => github.com/SimFG/milvus-sdk-go/v2 v2.0.0-20230919094145-06acf1ab753c + github.com/milvus-io/milvus/pkg => github.com/SimFG/milvus/pkg v0.0.0-20230925083123-8c1cd0c4b615 github.com/streamnative/pulsarctl => github.com/xiaofan-luan/pulsarctl v0.5.1 github.com/tecbot/gorocksdb => ./../rocksdb github.com/zilliztech/milvus-cdc/core => ../core diff --git a/server/go.sum b/server/go.sum index 093b07ab..cb81fe11 100644 --- a/server/go.sum +++ b/server/go.sum @@ -57,8 +57,10 @@ github.com/DataDog/zstd v1.5.0/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwS github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= -github.com/SimFG/milvus/pkg v0.0.0-20230823080606-a88e7c27d190 h1:bFSt+ZoBSj0NP6yjixW5hS6RR6snda9Czff1YMkM+y0= -github.com/SimFG/milvus/pkg v0.0.0-20230823080606-a88e7c27d190/go.mod h1:s5pCBW6tOsKxj7uTe8AvS2mbJ4LgxGZWxbnY8XoKRS0= +github.com/SimFG/milvus-sdk-go/v2 v2.0.0-20230919094145-06acf1ab753c h1:1S40miUo1RfuucrFheszLHW9j4nqR3aFFH0QUAifRAA= +github.com/SimFG/milvus-sdk-go/v2 v2.0.0-20230919094145-06acf1ab753c/go.mod h1:O1gKEbj3snNwETxglX0y3nIZNdQnWP57txQ1nphpM1o= +github.com/SimFG/milvus/pkg v0.0.0-20230925083123-8c1cd0c4b615 h1:dErrHuWnWHWxSbC58BnTwZW1wLNDi9WqwbMbINNQS2w= +github.com/SimFG/milvus/pkg v0.0.0-20230925083123-8c1cd0c4b615/go.mod h1:SGW0KrIpHihP4GmWVfiQDW3p/YsQijrv0aeu5tNvyE8= github.com/actgardner/gogen-avro/v10 v10.1.0/go.mod h1:o+ybmVjEa27AAr35FRqU98DJu1fXES56uXniYFv4yDA= github.com/actgardner/gogen-avro/v10 v10.2.1/go.mod h1:QUhjeHPchheYmMDni/Nx7VB0RsT/ee8YIgGY/xpEQgQ= github.com/actgardner/gogen-avro/v9 v9.1.0/go.mod h1:nyTj6wPqDJoxM3qdnjcLv+EnMDSDFqE0qDpva2QRmKc= @@ -281,8 +283,9 @@ github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4= github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= +github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= +github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.2.1-0.20190312032427-6f77996f0c42/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -472,10 +475,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/milvus-io/milvus-proto/go-api/v2 v2.3.0-dev.1.0.20230716112827-c3fe148f5e1d h1:XsQQ/MigebXEE2VXPKKmA3K7OHC+mkEUiErWvaWMikI= -github.com/milvus-io/milvus-proto/go-api/v2 v2.3.0-dev.1.0.20230716112827-c3fe148f5e1d/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek= -github.com/milvus-io/milvus-sdk-go/v2 v2.2.1-0.20230814034926-dd5a31f64225 h1:zmBNiRr/WUHGP2AMb0HxrECe8cWCdpPOpljAJfDlwPI= -github.com/milvus-io/milvus-sdk-go/v2 v2.2.1-0.20230814034926-dd5a31f64225/go.mod h1:hmrgMsXp/uFtCSnUkDzVQr4FK31x5seCKAIqkJO+uRM= +github.com/milvus-io/milvus-proto/go-api/v2 v2.3.2-0.20230919092633-6ef446ad2aab h1:XJtS30t72wLCnhI9EzaWCA8Hr5zROHj2/S+YYV4+hIs= +github.com/milvus-io/milvus-proto/go-api/v2 v2.3.2-0.20230919092633-6ef446ad2aab/go.mod h1:1OIl0v5PQeNxIJhCvY+K55CBUOYDZevw9g9380u1Wek= github.com/milvus-io/pulsar-client-go v0.6.10 h1:eqpJjU+/QX0iIhEo3nhOqMNXL+TyInAs1IAHZCrCM/A= github.com/milvus-io/pulsar-client-go v0.6.10/go.mod h1:lQqCkgwDF8YFYjKA+zOheTk1tev2B+bKj5j7+nm8M1w= github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g= @@ -546,8 +547,8 @@ github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCko github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI= github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTmyFqUwr+jcCvpVkK7sumiz+ko5H9eq4= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/server/handle_map.go b/server/handle_map.go index c13827cc..a7f05aff 100644 --- a/server/handle_map.go +++ b/server/handle_map.go @@ -28,7 +28,7 @@ var ( type requestHandler struct { generateModel func() any - handle func(api CDCApi, request any) (any, error) + handle func(api CDCService, request any) (any, error) } func init() { @@ -37,7 +37,7 @@ func init() { generateModel: func() any { return &modelrequest.CreateRequest{} }, - handle: func(api CDCApi, request any) (any, error) { + handle: func(api CDCService, request any) (any, error) { createRequest, ok := request.(*modelrequest.CreateRequest) if !ok { return nil, errors.New("fail to cast the request to the create model") @@ -49,7 +49,7 @@ func init() { generateModel: func() any { return &modelrequest.DeleteRequest{} }, - handle: func(api CDCApi, request any) (any, error) { + handle: func(api CDCService, request any) (any, error) { deleteRequest, ok := request.(*modelrequest.DeleteRequest) if !ok { return nil, errors.New("fail to cast the request to the delete model") @@ -61,7 +61,7 @@ func init() { generateModel: func() any { return &modelrequest.PauseRequest{} }, - handle: func(api CDCApi, request any) (any, error) { + handle: func(api CDCService, request any) (any, error) { pauseRequest, ok := request.(*modelrequest.PauseRequest) if !ok { return nil, errors.New("fail to cast the request to the pause model") @@ -73,7 +73,7 @@ func init() { generateModel: func() any { return &modelrequest.ResumeRequest{} }, - handle: func(api CDCApi, request any) (any, error) { + handle: func(api CDCService, request any) (any, error) { resumeRequest, ok := request.(*modelrequest.ResumeRequest) if !ok { return nil, errors.New("fail to cast the request to the resume model") @@ -85,7 +85,7 @@ func init() { generateModel: func() any { return &modelrequest.GetRequest{} }, - handle: func(api CDCApi, request any) (any, error) { + handle: func(api CDCService, request any) (any, error) { getRequest, ok := request.(*modelrequest.GetRequest) if !ok { return nil, errors.New("fail to cast the request to the get model") @@ -97,7 +97,7 @@ func init() { generateModel: func() any { return &modelrequest.ListRequest{} }, - handle: func(api CDCApi, request any) (any, error) { + handle: func(api CDCService, request any) (any, error) { listRequest, ok := request.(*modelrequest.ListRequest) if !ok { return nil, errors.New("fail to cast the request to the list model") diff --git a/server/handle_map_test.go b/server/handle_map_test.go index 5a6ea633..cb641ea2 100644 --- a/server/handle_map_test.go +++ b/server/handle_map_test.go @@ -20,6 +20,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/zilliztech/milvus-cdc/server/model/request" ) diff --git a/server/main/main.go b/server/main/main.go index 39ba9447..0f3f9bac 100644 --- a/server/main/main.go +++ b/server/main/main.go @@ -19,13 +19,18 @@ package main import ( "os" - "github.com/zilliztech/milvus-cdc/core/util" - "github.com/zilliztech/milvus-cdc/server" + "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/util/paramtable" "go.uber.org/zap" "sigs.k8s.io/yaml" + + "github.com/zilliztech/milvus-cdc/server" ) func main() { + // TODO check it + paramtable.Init() + s := &server.CDCServer{} // parse config file @@ -33,7 +38,7 @@ func main() { var serverConfig server.CDCServerConfig err := yaml.Unmarshal(fileContent, &serverConfig) if err != nil { - util.Log.Panic("Failed to parse config file", zap.Error(err)) + log.Panic("Failed to parse config file", zap.Error(err)) } s.Run(&serverConfig) } diff --git a/server/metrics/metrics_task_num.go b/server/metrics/metrics_task_num.go index 93bc46f4..3a130ddf 100644 --- a/server/metrics/metrics_task_num.go +++ b/server/metrics/metrics_task_num.go @@ -17,13 +17,12 @@ package metrics import ( + "github.com/milvus-io/milvus/pkg/log" "github.com/prometheus/client_golang/prometheus" - "github.com/zilliztech/milvus-cdc/core/util" - "github.com/zilliztech/milvus-cdc/server/model/meta" "go.uber.org/zap" -) -var log = util.Log + "github.com/zilliztech/milvus-cdc/server/model/meta" +) type TaskNumMetric struct { metricDesc *prometheus.Desc diff --git a/server/mocks/cdc_factory.go b/server/mocks/cdc_factory.go new file mode 100644 index 00000000..5b8984e0 --- /dev/null +++ b/server/mocks/cdc_factory.go @@ -0,0 +1,175 @@ +// Code generated by mockery v2.32.4. DO NOT EDIT. + +package mocks + +import ( + mock "github.com/stretchr/testify/mock" + reader "github.com/zilliztech/milvus-cdc/core/reader" + + writer "github.com/zilliztech/milvus-cdc/core/writer" +) + +// CDCFactory is an autogenerated mock type for the CDCFactory type +type CDCFactory struct { + mock.Mock +} + +type CDCFactory_Expecter struct { + mock *mock.Mock +} + +func (_m *CDCFactory) EXPECT() *CDCFactory_Expecter { + return &CDCFactory_Expecter{mock: &_m.Mock} +} + +// NewReader provides a mock function with given fields: +func (_m *CDCFactory) NewReader() (reader.CDCReader, error) { + ret := _m.Called() + + var r0 reader.CDCReader + var r1 error + if rf, ok := ret.Get(0).(func() (reader.CDCReader, error)); ok { + return rf() + } + if rf, ok := ret.Get(0).(func() reader.CDCReader); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(reader.CDCReader) + } + } + + if rf, ok := ret.Get(1).(func() error); ok { + r1 = rf() + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// CDCFactory_NewReader_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'NewReader' +type CDCFactory_NewReader_Call struct { + *mock.Call +} + +// NewReader is a helper method to define mock.On call +func (_e *CDCFactory_Expecter) NewReader() *CDCFactory_NewReader_Call { + return &CDCFactory_NewReader_Call{Call: _e.mock.On("NewReader")} +} + +func (_c *CDCFactory_NewReader_Call) Run(run func()) *CDCFactory_NewReader_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *CDCFactory_NewReader_Call) Return(_a0 reader.CDCReader, _a1 error) *CDCFactory_NewReader_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *CDCFactory_NewReader_Call) RunAndReturn(run func() (reader.CDCReader, error)) *CDCFactory_NewReader_Call { + _c.Call.Return(run) + return _c +} + +// NewWriter provides a mock function with given fields: +func (_m *CDCFactory) NewWriter() (writer.CDCWriter, error) { + ret := _m.Called() + + var r0 writer.CDCWriter + var r1 error + if rf, ok := ret.Get(0).(func() (writer.CDCWriter, error)); ok { + return rf() + } + if rf, ok := ret.Get(0).(func() writer.CDCWriter); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(writer.CDCWriter) + } + } + + if rf, ok := ret.Get(1).(func() error); ok { + r1 = rf() + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// CDCFactory_NewWriter_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'NewWriter' +type CDCFactory_NewWriter_Call struct { + *mock.Call +} + +// NewWriter is a helper method to define mock.On call +func (_e *CDCFactory_Expecter) NewWriter() *CDCFactory_NewWriter_Call { + return &CDCFactory_NewWriter_Call{Call: _e.mock.On("NewWriter")} +} + +func (_c *CDCFactory_NewWriter_Call) Run(run func()) *CDCFactory_NewWriter_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *CDCFactory_NewWriter_Call) Return(_a0 writer.CDCWriter, _a1 error) *CDCFactory_NewWriter_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *CDCFactory_NewWriter_Call) RunAndReturn(run func() (writer.CDCWriter, error)) *CDCFactory_NewWriter_Call { + _c.Call.Return(run) + return _c +} + +// cdc provides a mock function with given fields: +func (_m *CDCFactory) cdc() { + _m.Called() +} + +// CDCFactory_cdc_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'cdc' +type CDCFactory_cdc_Call struct { + *mock.Call +} + +// cdc is a helper method to define mock.On call +func (_e *CDCFactory_Expecter) cdc() *CDCFactory_cdc_Call { + return &CDCFactory_cdc_Call{Call: _e.mock.On("cdc")} +} + +func (_c *CDCFactory_cdc_Call) Run(run func()) *CDCFactory_cdc_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *CDCFactory_cdc_Call) Return() *CDCFactory_cdc_Call { + _c.Call.Return() + return _c +} + +func (_c *CDCFactory_cdc_Call) RunAndReturn(run func()) *CDCFactory_cdc_Call { + _c.Call.Return(run) + return _c +} + +// NewCDCFactory creates a new instance of CDCFactory. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewCDCFactory(t interface { + mock.TestingT + Cleanup(func()) +}) *CDCFactory { + mock := &CDCFactory{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/server/mocks/cdc_factory_mock.go b/server/mocks/cdc_factory_mock.go deleted file mode 100644 index 8342c699..00000000 --- a/server/mocks/cdc_factory_mock.go +++ /dev/null @@ -1,88 +0,0 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. - -package mocks - -import ( - mock "github.com/stretchr/testify/mock" - reader "github.com/zilliztech/milvus-cdc/core/reader" - "github.com/zilliztech/milvus-cdc/core/util" - writer "github.com/zilliztech/milvus-cdc/core/writer" -) - -// CDCFactory is an autogenerated mock type for the CDCFactory type -type CDCFactory struct { - util.CDCMark - mock.Mock -} - -// NewReader provides a mock function with given fields: -func (_m *CDCFactory) NewReader() (reader.CDCReader, error) { - ret := _m.Called() - - var r0 reader.CDCReader - var r1 error - if rf, ok := ret.Get(0).(func() (reader.CDCReader, error)); ok { - return rf() - } - if rf, ok := ret.Get(0).(func() reader.CDCReader); ok { - r0 = rf() - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(reader.CDCReader) - } - } - - if rf, ok := ret.Get(1).(func() error); ok { - r1 = rf() - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// NewWriter provides a mock function with given fields: -func (_m *CDCFactory) NewWriter() (writer.CDCWriter, error) { - ret := _m.Called() - - var r0 writer.CDCWriter - var r1 error - if rf, ok := ret.Get(0).(func() (writer.CDCWriter, error)); ok { - return rf() - } - if rf, ok := ret.Get(0).(func() writer.CDCWriter); ok { - r0 = rf() - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(writer.CDCWriter) - } - } - - if rf, ok := ret.Get(1).(func() error); ok { - r1 = rf() - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// cdc provides a mock function with given fields: -func (_m *CDCFactory) cdc() { - _m.Called() -} - -type mockConstructorTestingTNewCDCFactory interface { - mock.TestingT - Cleanup(func()) -} - -// NewCDCFactory creates a new instance of CDCFactory. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewCDCFactory(t mockConstructorTestingTNewCDCFactory) *CDCFactory { - mock := &CDCFactory{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/server/mocks/cdc_service.go b/server/mocks/cdc_service.go new file mode 100644 index 00000000..521772b2 --- /dev/null +++ b/server/mocks/cdc_service.go @@ -0,0 +1,391 @@ +// Code generated by mockery v2.32.4. DO NOT EDIT. + +package mocks + +import ( + mock "github.com/stretchr/testify/mock" + request "github.com/zilliztech/milvus-cdc/server/model/request" +) + +// CDCService is an autogenerated mock type for the CDCService type +type CDCService struct { + mock.Mock +} + +type CDCService_Expecter struct { + mock *mock.Mock +} + +func (_m *CDCService) EXPECT() *CDCService_Expecter { + return &CDCService_Expecter{mock: &_m.Mock} +} + +// Create provides a mock function with given fields: _a0 +func (_m *CDCService) Create(_a0 *request.CreateRequest) (*request.CreateResponse, error) { + ret := _m.Called(_a0) + + var r0 *request.CreateResponse + var r1 error + if rf, ok := ret.Get(0).(func(*request.CreateRequest) (*request.CreateResponse, error)); ok { + return rf(_a0) + } + if rf, ok := ret.Get(0).(func(*request.CreateRequest) *request.CreateResponse); ok { + r0 = rf(_a0) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*request.CreateResponse) + } + } + + if rf, ok := ret.Get(1).(func(*request.CreateRequest) error); ok { + r1 = rf(_a0) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// CDCService_Create_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Create' +type CDCService_Create_Call struct { + *mock.Call +} + +// Create is a helper method to define mock.On call +// - _a0 *request.CreateRequest +func (_e *CDCService_Expecter) Create(_a0 interface{}) *CDCService_Create_Call { + return &CDCService_Create_Call{Call: _e.mock.On("Create", _a0)} +} + +func (_c *CDCService_Create_Call) Run(run func(_a0 *request.CreateRequest)) *CDCService_Create_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*request.CreateRequest)) + }) + return _c +} + +func (_c *CDCService_Create_Call) Return(_a0 *request.CreateResponse, _a1 error) *CDCService_Create_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *CDCService_Create_Call) RunAndReturn(run func(*request.CreateRequest) (*request.CreateResponse, error)) *CDCService_Create_Call { + _c.Call.Return(run) + return _c +} + +// Delete provides a mock function with given fields: _a0 +func (_m *CDCService) Delete(_a0 *request.DeleteRequest) (*request.DeleteResponse, error) { + ret := _m.Called(_a0) + + var r0 *request.DeleteResponse + var r1 error + if rf, ok := ret.Get(0).(func(*request.DeleteRequest) (*request.DeleteResponse, error)); ok { + return rf(_a0) + } + if rf, ok := ret.Get(0).(func(*request.DeleteRequest) *request.DeleteResponse); ok { + r0 = rf(_a0) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*request.DeleteResponse) + } + } + + if rf, ok := ret.Get(1).(func(*request.DeleteRequest) error); ok { + r1 = rf(_a0) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// CDCService_Delete_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Delete' +type CDCService_Delete_Call struct { + *mock.Call +} + +// Delete is a helper method to define mock.On call +// - _a0 *request.DeleteRequest +func (_e *CDCService_Expecter) Delete(_a0 interface{}) *CDCService_Delete_Call { + return &CDCService_Delete_Call{Call: _e.mock.On("Delete", _a0)} +} + +func (_c *CDCService_Delete_Call) Run(run func(_a0 *request.DeleteRequest)) *CDCService_Delete_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*request.DeleteRequest)) + }) + return _c +} + +func (_c *CDCService_Delete_Call) Return(_a0 *request.DeleteResponse, _a1 error) *CDCService_Delete_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *CDCService_Delete_Call) RunAndReturn(run func(*request.DeleteRequest) (*request.DeleteResponse, error)) *CDCService_Delete_Call { + _c.Call.Return(run) + return _c +} + +// Get provides a mock function with given fields: _a0 +func (_m *CDCService) Get(_a0 *request.GetRequest) (*request.GetResponse, error) { + ret := _m.Called(_a0) + + var r0 *request.GetResponse + var r1 error + if rf, ok := ret.Get(0).(func(*request.GetRequest) (*request.GetResponse, error)); ok { + return rf(_a0) + } + if rf, ok := ret.Get(0).(func(*request.GetRequest) *request.GetResponse); ok { + r0 = rf(_a0) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*request.GetResponse) + } + } + + if rf, ok := ret.Get(1).(func(*request.GetRequest) error); ok { + r1 = rf(_a0) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// CDCService_Get_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Get' +type CDCService_Get_Call struct { + *mock.Call +} + +// Get is a helper method to define mock.On call +// - _a0 *request.GetRequest +func (_e *CDCService_Expecter) Get(_a0 interface{}) *CDCService_Get_Call { + return &CDCService_Get_Call{Call: _e.mock.On("Get", _a0)} +} + +func (_c *CDCService_Get_Call) Run(run func(_a0 *request.GetRequest)) *CDCService_Get_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*request.GetRequest)) + }) + return _c +} + +func (_c *CDCService_Get_Call) Return(_a0 *request.GetResponse, _a1 error) *CDCService_Get_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *CDCService_Get_Call) RunAndReturn(run func(*request.GetRequest) (*request.GetResponse, error)) *CDCService_Get_Call { + _c.Call.Return(run) + return _c +} + +// List provides a mock function with given fields: _a0 +func (_m *CDCService) List(_a0 *request.ListRequest) (*request.ListResponse, error) { + ret := _m.Called(_a0) + + var r0 *request.ListResponse + var r1 error + if rf, ok := ret.Get(0).(func(*request.ListRequest) (*request.ListResponse, error)); ok { + return rf(_a0) + } + if rf, ok := ret.Get(0).(func(*request.ListRequest) *request.ListResponse); ok { + r0 = rf(_a0) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*request.ListResponse) + } + } + + if rf, ok := ret.Get(1).(func(*request.ListRequest) error); ok { + r1 = rf(_a0) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// CDCService_List_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'List' +type CDCService_List_Call struct { + *mock.Call +} + +// List is a helper method to define mock.On call +// - _a0 *request.ListRequest +func (_e *CDCService_Expecter) List(_a0 interface{}) *CDCService_List_Call { + return &CDCService_List_Call{Call: _e.mock.On("List", _a0)} +} + +func (_c *CDCService_List_Call) Run(run func(_a0 *request.ListRequest)) *CDCService_List_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*request.ListRequest)) + }) + return _c +} + +func (_c *CDCService_List_Call) Return(_a0 *request.ListResponse, _a1 error) *CDCService_List_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *CDCService_List_Call) RunAndReturn(run func(*request.ListRequest) (*request.ListResponse, error)) *CDCService_List_Call { + _c.Call.Return(run) + return _c +} + +// Pause provides a mock function with given fields: _a0 +func (_m *CDCService) Pause(_a0 *request.PauseRequest) (*request.PauseResponse, error) { + ret := _m.Called(_a0) + + var r0 *request.PauseResponse + var r1 error + if rf, ok := ret.Get(0).(func(*request.PauseRequest) (*request.PauseResponse, error)); ok { + return rf(_a0) + } + if rf, ok := ret.Get(0).(func(*request.PauseRequest) *request.PauseResponse); ok { + r0 = rf(_a0) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*request.PauseResponse) + } + } + + if rf, ok := ret.Get(1).(func(*request.PauseRequest) error); ok { + r1 = rf(_a0) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// CDCService_Pause_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Pause' +type CDCService_Pause_Call struct { + *mock.Call +} + +// Pause is a helper method to define mock.On call +// - _a0 *request.PauseRequest +func (_e *CDCService_Expecter) Pause(_a0 interface{}) *CDCService_Pause_Call { + return &CDCService_Pause_Call{Call: _e.mock.On("Pause", _a0)} +} + +func (_c *CDCService_Pause_Call) Run(run func(_a0 *request.PauseRequest)) *CDCService_Pause_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*request.PauseRequest)) + }) + return _c +} + +func (_c *CDCService_Pause_Call) Return(_a0 *request.PauseResponse, _a1 error) *CDCService_Pause_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *CDCService_Pause_Call) RunAndReturn(run func(*request.PauseRequest) (*request.PauseResponse, error)) *CDCService_Pause_Call { + _c.Call.Return(run) + return _c +} + +// ReloadTask provides a mock function with given fields: +func (_m *CDCService) ReloadTask() { + _m.Called() +} + +// CDCService_ReloadTask_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ReloadTask' +type CDCService_ReloadTask_Call struct { + *mock.Call +} + +// ReloadTask is a helper method to define mock.On call +func (_e *CDCService_Expecter) ReloadTask() *CDCService_ReloadTask_Call { + return &CDCService_ReloadTask_Call{Call: _e.mock.On("ReloadTask")} +} + +func (_c *CDCService_ReloadTask_Call) Run(run func()) *CDCService_ReloadTask_Call { + _c.Call.Run(func(args mock.Arguments) { + run() + }) + return _c +} + +func (_c *CDCService_ReloadTask_Call) Return() *CDCService_ReloadTask_Call { + _c.Call.Return() + return _c +} + +func (_c *CDCService_ReloadTask_Call) RunAndReturn(run func()) *CDCService_ReloadTask_Call { + _c.Call.Return(run) + return _c +} + +// Resume provides a mock function with given fields: _a0 +func (_m *CDCService) Resume(_a0 *request.ResumeRequest) (*request.ResumeResponse, error) { + ret := _m.Called(_a0) + + var r0 *request.ResumeResponse + var r1 error + if rf, ok := ret.Get(0).(func(*request.ResumeRequest) (*request.ResumeResponse, error)); ok { + return rf(_a0) + } + if rf, ok := ret.Get(0).(func(*request.ResumeRequest) *request.ResumeResponse); ok { + r0 = rf(_a0) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*request.ResumeResponse) + } + } + + if rf, ok := ret.Get(1).(func(*request.ResumeRequest) error); ok { + r1 = rf(_a0) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// CDCService_Resume_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Resume' +type CDCService_Resume_Call struct { + *mock.Call +} + +// Resume is a helper method to define mock.On call +// - _a0 *request.ResumeRequest +func (_e *CDCService_Expecter) Resume(_a0 interface{}) *CDCService_Resume_Call { + return &CDCService_Resume_Call{Call: _e.mock.On("Resume", _a0)} +} + +func (_c *CDCService_Resume_Call) Run(run func(_a0 *request.ResumeRequest)) *CDCService_Resume_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(*request.ResumeRequest)) + }) + return _c +} + +func (_c *CDCService_Resume_Call) Return(_a0 *request.ResumeResponse, _a1 error) *CDCService_Resume_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *CDCService_Resume_Call) RunAndReturn(run func(*request.ResumeRequest) (*request.ResumeResponse, error)) *CDCService_Resume_Call { + _c.Call.Return(run) + return _c +} + +// NewCDCService creates a new instance of CDCService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewCDCService(t interface { + mock.TestingT + Cleanup(func()) +}) *CDCService { + mock := &CDCService{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/server/mocks/meta_store.go b/server/mocks/meta_store.go new file mode 100644 index 00000000..832c8fdb --- /dev/null +++ b/server/mocks/meta_store.go @@ -0,0 +1,180 @@ +// Code generated by mockery v2.32.4. DO NOT EDIT. + +package mocks + +import ( + context "context" + + mock "github.com/stretchr/testify/mock" +) + +// MetaStore is an autogenerated mock type for the MetaStore type +type MetaStore[M interface{}] struct { + mock.Mock +} + +type MetaStore_Expecter[M interface{}] struct { + mock *mock.Mock +} + +func (_m *MetaStore[M]) EXPECT() *MetaStore_Expecter[M] { + return &MetaStore_Expecter[M]{mock: &_m.Mock} +} + +// Delete provides a mock function with given fields: ctx, metaObj, txn +func (_m *MetaStore[M]) Delete(ctx context.Context, metaObj M, txn interface{}) error { + ret := _m.Called(ctx, metaObj, txn) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, M, interface{}) error); ok { + r0 = rf(ctx, metaObj, txn) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MetaStore_Delete_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Delete' +type MetaStore_Delete_Call[M interface{}] struct { + *mock.Call +} + +// Delete is a helper method to define mock.On call +// - ctx context.Context +// - metaObj M +// - txn interface{} +func (_e *MetaStore_Expecter[M]) Delete(ctx interface{}, metaObj interface{}, txn interface{}) *MetaStore_Delete_Call[M] { + return &MetaStore_Delete_Call[M]{Call: _e.mock.On("Delete", ctx, metaObj, txn)} +} + +func (_c *MetaStore_Delete_Call[M]) Run(run func(ctx context.Context, metaObj M, txn interface{})) *MetaStore_Delete_Call[M] { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(M), args[2].(interface{})) + }) + return _c +} + +func (_c *MetaStore_Delete_Call[M]) Return(_a0 error) *MetaStore_Delete_Call[M] { + _c.Call.Return(_a0) + return _c +} + +func (_c *MetaStore_Delete_Call[M]) RunAndReturn(run func(context.Context, M, interface{}) error) *MetaStore_Delete_Call[M] { + _c.Call.Return(run) + return _c +} + +// Get provides a mock function with given fields: ctx, metaObj, txn +func (_m *MetaStore[M]) Get(ctx context.Context, metaObj M, txn interface{}) ([]M, error) { + ret := _m.Called(ctx, metaObj, txn) + + var r0 []M + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, M, interface{}) ([]M, error)); ok { + return rf(ctx, metaObj, txn) + } + if rf, ok := ret.Get(0).(func(context.Context, M, interface{}) []M); ok { + r0 = rf(ctx, metaObj, txn) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]M) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, M, interface{}) error); ok { + r1 = rf(ctx, metaObj, txn) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MetaStore_Get_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Get' +type MetaStore_Get_Call[M interface{}] struct { + *mock.Call +} + +// Get is a helper method to define mock.On call +// - ctx context.Context +// - metaObj M +// - txn interface{} +func (_e *MetaStore_Expecter[M]) Get(ctx interface{}, metaObj interface{}, txn interface{}) *MetaStore_Get_Call[M] { + return &MetaStore_Get_Call[M]{Call: _e.mock.On("Get", ctx, metaObj, txn)} +} + +func (_c *MetaStore_Get_Call[M]) Run(run func(ctx context.Context, metaObj M, txn interface{})) *MetaStore_Get_Call[M] { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(M), args[2].(interface{})) + }) + return _c +} + +func (_c *MetaStore_Get_Call[M]) Return(_a0 []M, _a1 error) *MetaStore_Get_Call[M] { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MetaStore_Get_Call[M]) RunAndReturn(run func(context.Context, M, interface{}) ([]M, error)) *MetaStore_Get_Call[M] { + _c.Call.Return(run) + return _c +} + +// Put provides a mock function with given fields: ctx, metaObj, txn +func (_m *MetaStore[M]) Put(ctx context.Context, metaObj M, txn interface{}) error { + ret := _m.Called(ctx, metaObj, txn) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, M, interface{}) error); ok { + r0 = rf(ctx, metaObj, txn) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MetaStore_Put_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Put' +type MetaStore_Put_Call[M interface{}] struct { + *mock.Call +} + +// Put is a helper method to define mock.On call +// - ctx context.Context +// - metaObj M +// - txn interface{} +func (_e *MetaStore_Expecter[M]) Put(ctx interface{}, metaObj interface{}, txn interface{}) *MetaStore_Put_Call[M] { + return &MetaStore_Put_Call[M]{Call: _e.mock.On("Put", ctx, metaObj, txn)} +} + +func (_c *MetaStore_Put_Call[M]) Run(run func(ctx context.Context, metaObj M, txn interface{})) *MetaStore_Put_Call[M] { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(M), args[2].(interface{})) + }) + return _c +} + +func (_c *MetaStore_Put_Call[M]) Return(_a0 error) *MetaStore_Put_Call[M] { + _c.Call.Return(_a0) + return _c +} + +func (_c *MetaStore_Put_Call[M]) RunAndReturn(run func(context.Context, M, interface{}) error) *MetaStore_Put_Call[M] { + _c.Call.Return(run) + return _c +} + +// NewMetaStore creates a new instance of MetaStore. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewMetaStore[M interface{}](t interface { + mock.TestingT + Cleanup(func()) +}) *MetaStore[M] { + mock := &MetaStore[M]{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/server/mocks/meta_store_factory.go b/server/mocks/meta_store_factory.go new file mode 100644 index 00000000..b3e08e72 --- /dev/null +++ b/server/mocks/meta_store_factory.go @@ -0,0 +1,190 @@ +// Code generated by mockery v2.32.4. DO NOT EDIT. + +package mocks + +import ( + context "context" + + mock "github.com/stretchr/testify/mock" + meta "github.com/zilliztech/milvus-cdc/server/model/meta" + + store "github.com/zilliztech/milvus-cdc/server/store" +) + +// MetaStoreFactory is an autogenerated mock type for the MetaStoreFactory type +type MetaStoreFactory struct { + mock.Mock +} + +type MetaStoreFactory_Expecter struct { + mock *mock.Mock +} + +func (_m *MetaStoreFactory) EXPECT() *MetaStoreFactory_Expecter { + return &MetaStoreFactory_Expecter{mock: &_m.Mock} +} + +// GetTaskCollectionPositionMetaStore provides a mock function with given fields: ctx +func (_m *MetaStoreFactory) GetTaskCollectionPositionMetaStore(ctx context.Context) store.MetaStore[*meta.TaskCollectionPosition] { + ret := _m.Called(ctx) + + var r0 store.MetaStore[*meta.TaskCollectionPosition] + if rf, ok := ret.Get(0).(func(context.Context) store.MetaStore[*meta.TaskCollectionPosition]); ok { + r0 = rf(ctx) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(store.MetaStore[*meta.TaskCollectionPosition]) + } + } + + return r0 +} + +// MetaStoreFactory_GetTaskCollectionPositionMetaStore_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetTaskCollectionPositionMetaStore' +type MetaStoreFactory_GetTaskCollectionPositionMetaStore_Call struct { + *mock.Call +} + +// GetTaskCollectionPositionMetaStore is a helper method to define mock.On call +// - ctx context.Context +func (_e *MetaStoreFactory_Expecter) GetTaskCollectionPositionMetaStore(ctx interface{}) *MetaStoreFactory_GetTaskCollectionPositionMetaStore_Call { + return &MetaStoreFactory_GetTaskCollectionPositionMetaStore_Call{Call: _e.mock.On("GetTaskCollectionPositionMetaStore", ctx)} +} + +func (_c *MetaStoreFactory_GetTaskCollectionPositionMetaStore_Call) Run(run func(ctx context.Context)) *MetaStoreFactory_GetTaskCollectionPositionMetaStore_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context)) + }) + return _c +} + +func (_c *MetaStoreFactory_GetTaskCollectionPositionMetaStore_Call) Return(_a0 store.MetaStore[*meta.TaskCollectionPosition]) *MetaStoreFactory_GetTaskCollectionPositionMetaStore_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MetaStoreFactory_GetTaskCollectionPositionMetaStore_Call) RunAndReturn(run func(context.Context) store.MetaStore[*meta.TaskCollectionPosition]) *MetaStoreFactory_GetTaskCollectionPositionMetaStore_Call { + _c.Call.Return(run) + return _c +} + +// GetTaskInfoMetaStore provides a mock function with given fields: ctx +func (_m *MetaStoreFactory) GetTaskInfoMetaStore(ctx context.Context) store.MetaStore[*meta.TaskInfo] { + ret := _m.Called(ctx) + + var r0 store.MetaStore[*meta.TaskInfo] + if rf, ok := ret.Get(0).(func(context.Context) store.MetaStore[*meta.TaskInfo]); ok { + r0 = rf(ctx) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(store.MetaStore[*meta.TaskInfo]) + } + } + + return r0 +} + +// MetaStoreFactory_GetTaskInfoMetaStore_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetTaskInfoMetaStore' +type MetaStoreFactory_GetTaskInfoMetaStore_Call struct { + *mock.Call +} + +// GetTaskInfoMetaStore is a helper method to define mock.On call +// - ctx context.Context +func (_e *MetaStoreFactory_Expecter) GetTaskInfoMetaStore(ctx interface{}) *MetaStoreFactory_GetTaskInfoMetaStore_Call { + return &MetaStoreFactory_GetTaskInfoMetaStore_Call{Call: _e.mock.On("GetTaskInfoMetaStore", ctx)} +} + +func (_c *MetaStoreFactory_GetTaskInfoMetaStore_Call) Run(run func(ctx context.Context)) *MetaStoreFactory_GetTaskInfoMetaStore_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context)) + }) + return _c +} + +func (_c *MetaStoreFactory_GetTaskInfoMetaStore_Call) Return(_a0 store.MetaStore[*meta.TaskInfo]) *MetaStoreFactory_GetTaskInfoMetaStore_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MetaStoreFactory_GetTaskInfoMetaStore_Call) RunAndReturn(run func(context.Context) store.MetaStore[*meta.TaskInfo]) *MetaStoreFactory_GetTaskInfoMetaStore_Call { + _c.Call.Return(run) + return _c +} + +// Txn provides a mock function with given fields: ctx +func (_m *MetaStoreFactory) Txn(ctx context.Context) (interface{}, func(error) error, error) { + ret := _m.Called(ctx) + + var r0 interface{} + var r1 func(error) error + var r2 error + if rf, ok := ret.Get(0).(func(context.Context) (interface{}, func(error) error, error)); ok { + return rf(ctx) + } + if rf, ok := ret.Get(0).(func(context.Context) interface{}); ok { + r0 = rf(ctx) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(interface{}) + } + } + + if rf, ok := ret.Get(1).(func(context.Context) func(error) error); ok { + r1 = rf(ctx) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(func(error) error) + } + } + + if rf, ok := ret.Get(2).(func(context.Context) error); ok { + r2 = rf(ctx) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + +// MetaStoreFactory_Txn_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Txn' +type MetaStoreFactory_Txn_Call struct { + *mock.Call +} + +// Txn is a helper method to define mock.On call +// - ctx context.Context +func (_e *MetaStoreFactory_Expecter) Txn(ctx interface{}) *MetaStoreFactory_Txn_Call { + return &MetaStoreFactory_Txn_Call{Call: _e.mock.On("Txn", ctx)} +} + +func (_c *MetaStoreFactory_Txn_Call) Run(run func(ctx context.Context)) *MetaStoreFactory_Txn_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context)) + }) + return _c +} + +func (_c *MetaStoreFactory_Txn_Call) Return(_a0 interface{}, _a1 func(error) error, _a2 error) *MetaStoreFactory_Txn_Call { + _c.Call.Return(_a0, _a1, _a2) + return _c +} + +func (_c *MetaStoreFactory_Txn_Call) RunAndReturn(run func(context.Context) (interface{}, func(error) error, error)) *MetaStoreFactory_Txn_Call { + _c.Call.Return(run) + return _c +} + +// NewMetaStoreFactory creates a new instance of MetaStoreFactory. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewMetaStoreFactory(t interface { + mock.TestingT + Cleanup(func()) +}) *MetaStoreFactory { + mock := &MetaStoreFactory{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/server/mocks/meta_store_factory_mock.go b/server/mocks/meta_store_factory_mock.go deleted file mode 100644 index 7adf848a..00000000 --- a/server/mocks/meta_store_factory_mock.go +++ /dev/null @@ -1,99 +0,0 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. - -package mocks - -import ( - context "context" - - mock "github.com/stretchr/testify/mock" - meta "github.com/zilliztech/milvus-cdc/server/model/meta" - - store "github.com/zilliztech/milvus-cdc/server/store" -) - -// MetaStoreFactory is an autogenerated mock type for the MetaStoreFactory type -type MetaStoreFactory struct { - mock.Mock -} - -// GetTaskCollectionPositionMetaStore provides a mock function with given fields: ctx -func (_m *MetaStoreFactory) GetTaskCollectionPositionMetaStore(ctx context.Context) store.MetaStore[*meta.TaskCollectionPosition] { - ret := _m.Called(ctx) - - var r0 store.MetaStore[*meta.TaskCollectionPosition] - if rf, ok := ret.Get(0).(func(context.Context) store.MetaStore[*meta.TaskCollectionPosition]); ok { - r0 = rf(ctx) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(store.MetaStore[*meta.TaskCollectionPosition]) - } - } - - return r0 -} - -// GetTaskInfoMetaStore provides a mock function with given fields: ctx -func (_m *MetaStoreFactory) GetTaskInfoMetaStore(ctx context.Context) store.MetaStore[*meta.TaskInfo] { - ret := _m.Called(ctx) - - var r0 store.MetaStore[*meta.TaskInfo] - if rf, ok := ret.Get(0).(func(context.Context) store.MetaStore[*meta.TaskInfo]); ok { - r0 = rf(ctx) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(store.MetaStore[*meta.TaskInfo]) - } - } - - return r0 -} - -// Txn provides a mock function with given fields: ctx -func (_m *MetaStoreFactory) Txn(ctx context.Context) (interface{}, func(error) error, error) { - ret := _m.Called(ctx) - - var r0 interface{} - var r1 func(error) error - var r2 error - if rf, ok := ret.Get(0).(func(context.Context) (interface{}, func(error) error, error)); ok { - return rf(ctx) - } - if rf, ok := ret.Get(0).(func(context.Context) interface{}); ok { - r0 = rf(ctx) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(interface{}) - } - } - - if rf, ok := ret.Get(1).(func(context.Context) func(error) error); ok { - r1 = rf(ctx) - } else { - if ret.Get(1) != nil { - r1 = ret.Get(1).(func(error) error) - } - } - - if rf, ok := ret.Get(2).(func(context.Context) error); ok { - r2 = rf(ctx) - } else { - r2 = ret.Error(2) - } - - return r0, r1, r2 -} - -type mockConstructorTestingTNewMetaStoreFactory interface { - mock.TestingT - Cleanup(func()) -} - -// NewMetaStoreFactory creates a new instance of MetaStoreFactory. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewMetaStoreFactory(t mockConstructorTestingTNewMetaStoreFactory) *MetaStoreFactory { - mock := &MetaStoreFactory{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/server/mocks/meta_store_mock.go b/server/mocks/meta_store_mock.go deleted file mode 100644 index 36f87ea8..00000000 --- a/server/mocks/meta_store_mock.go +++ /dev/null @@ -1,83 +0,0 @@ -// Code generated by mockery v2.20.0. DO NOT EDIT. - -package mocks - -import ( - context "context" - - mock "github.com/stretchr/testify/mock" -) - -// MetaStore is an autogenerated mock type for the MetaStore type -type MetaStore[M interface{}] struct { - mock.Mock -} - -// Delete provides a mock function with given fields: ctx, metaObj, txn -func (_m *MetaStore[M]) Delete(ctx context.Context, metaObj M, txn interface{}) error { - ret := _m.Called(ctx, metaObj, txn) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, M, interface{}) error); ok { - r0 = rf(ctx, metaObj, txn) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Get provides a mock function with given fields: ctx, metaObj, txn -func (_m *MetaStore[M]) Get(ctx context.Context, metaObj M, txn interface{}) ([]M, error) { - ret := _m.Called(ctx, metaObj, txn) - - var r0 []M - var r1 error - if rf, ok := ret.Get(0).(func(context.Context, M, interface{}) ([]M, error)); ok { - return rf(ctx, metaObj, txn) - } - if rf, ok := ret.Get(0).(func(context.Context, M, interface{}) []M); ok { - r0 = rf(ctx, metaObj, txn) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]M) - } - } - - if rf, ok := ret.Get(1).(func(context.Context, M, interface{}) error); ok { - r1 = rf(ctx, metaObj, txn) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// Put provides a mock function with given fields: ctx, metaObj, txn -func (_m *MetaStore[M]) Put(ctx context.Context, metaObj M, txn interface{}) error { - ret := _m.Called(ctx, metaObj, txn) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, M, interface{}) error); ok { - r0 = rf(ctx, metaObj, txn) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -type mockConstructorTestingTNewMetaStore interface { - mock.TestingT - Cleanup(func()) -} - -// NewMetaStore creates a new instance of MetaStore. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewMetaStore[M interface{}](t mockConstructorTestingTNewMetaStore) *MetaStore[M] { - mock := &MetaStore[M]{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/server/model/meta/task.go b/server/model/meta/task.go index 5fcebd88..1bc63ce5 100644 --- a/server/model/meta/task.go +++ b/server/model/meta/task.go @@ -20,6 +20,7 @@ import ( "fmt" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/zilliztech/milvus-cdc/server/model" ) diff --git a/server/model/meta/task_test.go b/server/model/meta/task_test.go index 75ac656f..1d52eec7 100644 --- a/server/model/meta/task_test.go +++ b/server/model/meta/task_test.go @@ -20,6 +20,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/zilliztech/milvus-cdc/server/model" ) diff --git a/server/monitor.go b/server/monitor.go index 77f842c0..97c90640 100644 --- a/server/monitor.go +++ b/server/monitor.go @@ -17,9 +17,11 @@ package server import ( + "github.com/milvus-io/milvus/pkg/log" + "go.uber.org/zap" + "github.com/zilliztech/milvus-cdc/core/reader" "github.com/zilliztech/milvus-cdc/server/metrics" - "go.uber.org/zap" ) type ReaderMonitor struct { @@ -31,7 +33,7 @@ type ReaderMonitor struct { func NewReaderMonitor(taskID string) *ReaderMonitor { return &ReaderMonitor{ taskID: taskID, - log: log.With(zap.String("task_id", taskID)), + log: log.With(zap.String("task_id", taskID)).Logger, } } diff --git a/server/server.go b/server/server.go index f8753225..7e651f28 100644 --- a/server/server.go +++ b/server/server.go @@ -24,16 +24,18 @@ import ( "net/http" "time" + "github.com/milvus-io/milvus/pkg/log" "github.com/mitchellh/mapstructure" "github.com/samber/lo" + "go.uber.org/zap" + cdcerror "github.com/zilliztech/milvus-cdc/server/error" "github.com/zilliztech/milvus-cdc/server/metrics" modelrequest "github.com/zilliztech/milvus-cdc/server/model/request" - "go.uber.org/zap" ) type CDCServer struct { - api CDCApi + api CDCService serverConfig *CDCServerConfig } @@ -44,7 +46,7 @@ func (c *CDCServer) Run(config *CDCServerConfig) { c.api = GetCDCApi(c.serverConfig) c.api.ReloadTask() cdcHandler := c.getCDCHandler() - //{ + // { // channelReader, err := reader.NewChannelReader( // coreconfig.MilvusMQConfig{Pulsar: c.serverConfig.SourceConfig.Pulsar, Kafka: c.serverConfig.SourceConfig.Kafka}, // "by-dev-rpc-request", @@ -68,7 +70,7 @@ func (c *CDCServer) Run(config *CDCServerConfig) { // } // }() // } - //} + // } http.Handle("/cdc", cdcHandler) log.Info("start server...") err := http.ListenAndServe(c.serverConfig.Address, nil) diff --git a/server/server_test.go b/server/server_test.go index f4715be9..88f3a568 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -25,6 +25,7 @@ import ( "github.com/cockroachdb/errors" "github.com/stretchr/testify/assert" + cdcerror "github.com/zilliztech/milvus-cdc/server/error" "github.com/zilliztech/milvus-cdc/server/model/request" ) diff --git a/server/store/etcd.go b/server/store/etcd.go index 03f23bfa..6ba5c469 100644 --- a/server/store/etcd.go +++ b/server/store/etcd.go @@ -2,13 +2,16 @@ package store import ( "context" + "time" + "github.com/cockroachdb/errors" "github.com/goccy/go-json" - "github.com/zilliztech/milvus-cdc/core/util" - "github.com/zilliztech/milvus-cdc/server/model/meta" + "github.com/milvus-io/milvus/pkg/log" clientv3 "go.etcd.io/etcd/client/v3" "go.uber.org/zap" - "time" + + "github.com/zilliztech/milvus-cdc/core/util" + "github.com/zilliztech/milvus-cdc/server/model/meta" ) var ( @@ -27,7 +30,7 @@ type EtcdMetaStore struct { var _ MetaStoreFactory = &EtcdMetaStore{} func NewEtcdMetaStore(ctx context.Context, endpoints []string, rootPath string) (*EtcdMetaStore, error) { - log := util.Log.With(zap.Strings("endpoints", endpoints)) + log := log.With(zap.Strings("endpoints", endpoints)).Logger etcdClient, err := clientv3.New(clientv3.Config{ Endpoints: endpoints, DialTimeout: 5 * time.Second, @@ -95,7 +98,7 @@ func NewTaskInfoEtcdStore(ctx context.Context, etcdClient *clientv3.Client, root etcdClient: etcdClient, txnMap: txnMap, } - t.log = util.Log.With(zap.String("meta_store", "etcd"), zap.String("table", "task_info"), zap.String("root_path", rootPath)) + t.log = log.With(zap.String("meta_store", "etcd"), zap.String("table", "task_info"), zap.String("root_path", rootPath)).Logger err := EtcdStatus(ctx, etcdClient) if err != nil { t.log.Warn("unavailable etcd server, please check it", zap.Error(err)) @@ -208,7 +211,7 @@ func NewTaskCollectionPositionEtcdStore(ctx context.Context, etcdClient *clientv etcdClient: etcdClient, txnMap: txnMap, } - t.log = util.Log.With(zap.String("meta_store", "etcd"), zap.String("table", "task_collection_position"), zap.String("root_path", rootPath)) + t.log = log.With(zap.String("meta_store", "etcd"), zap.String("table", "task_collection_position"), zap.String("root_path", rootPath)).Logger err := EtcdStatus(ctx, etcdClient) if err != nil { t.log.Warn("unavailable etcd server, please check it", zap.Error(err)) diff --git a/server/store/meta_op.go b/server/store/meta_op.go index e5a2e3d1..abc71683 100644 --- a/server/store/meta_op.go +++ b/server/store/meta_op.go @@ -21,11 +21,13 @@ import ( "github.com/cockroachdb/errors" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus/pkg/log" "github.com/samber/lo" + "go.uber.org/zap" + servererror "github.com/zilliztech/milvus-cdc/server/error" "github.com/zilliztech/milvus-cdc/server/metrics" "github.com/zilliztech/milvus-cdc/server/model/meta" - "go.uber.org/zap" ) func GetTaskInfo(taskInfoStore MetaStore[*meta.TaskInfo], taskID string) (*meta.TaskInfo, error) { diff --git a/server/store/meta_store.go b/server/store/meta_store.go index ccf99be1..7e120a07 100644 --- a/server/store/meta_store.go +++ b/server/store/meta_store.go @@ -3,7 +3,6 @@ package store import ( "context" - "github.com/zilliztech/milvus-cdc/core/util" "github.com/zilliztech/milvus-cdc/server/model/meta" ) @@ -22,5 +21,3 @@ type MetaStoreFactory interface { // Txn return commit function and error Txn(ctx context.Context) (any, func(err error) error, error) } - -var log = util.Log diff --git a/server/store/meta_store_test.go b/server/store/meta_store_test.go index e35d32f6..cc930d2c 100644 --- a/server/store/meta_store_test.go +++ b/server/store/meta_store_test.go @@ -7,11 +7,12 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/stretchr/testify/assert" + clientv3 "go.etcd.io/etcd/client/v3" + "go.uber.org/zap" + "github.com/zilliztech/milvus-cdc/core/util" "github.com/zilliztech/milvus-cdc/server/model" "github.com/zilliztech/milvus-cdc/server/model/meta" - clientv3 "go.etcd.io/etcd/client/v3" - "go.uber.org/zap" ) func TestTxnMap(t *testing.T) { diff --git a/server/store/mysql.go b/server/store/mysql.go index e36266fc..eeb40efa 100644 --- a/server/store/mysql.go +++ b/server/store/mysql.go @@ -10,9 +10,11 @@ import ( _ "github.com/go-sql-driver/mysql" "github.com/goccy/go-json" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" + "github.com/milvus-io/milvus/pkg/log" + "go.uber.org/zap" + "github.com/zilliztech/milvus-cdc/core/util" "github.com/zilliztech/milvus-cdc/server/model/meta" - "go.uber.org/zap" ) type MySQLMetaStore struct { @@ -34,7 +36,7 @@ func NewMySQLMetaStore(ctx context.Context, dataSourceName string, rootPath stri } func (s *MySQLMetaStore) init(ctx context.Context, dataSourceName string, rootPath string) error { - s.log = util.Log.With(zap.String("meta_store", "mysql")) + s.log = log.With(zap.String("meta_store", "mysql")).Logger db, err := sql.Open("mysql", dataSourceName) if err != nil { s.log.Warn("fail to open mysql", zap.Error(err)) @@ -119,7 +121,7 @@ func NewTaskInfoMysqlStore(ctx context.Context, db *sql.DB, rootPath string, txn } func (m *TaskInfoMysqlStore) init(ctx context.Context, db *sql.DB, rootPath string) error { - m.log = util.Log.With(zap.String("meta_store", "mysql"), zap.String("table", "task_info"), zap.String("root_path", rootPath)) + m.log = log.With(zap.String("meta_store", "mysql"), zap.String("table", "task_info"), zap.String("root_path", rootPath)).Logger _, err := db.ExecContext(ctx, ` CREATE TABLE IF NOT EXISTS task_info ( task_info_key VARCHAR(255) NOT NULL, @@ -275,7 +277,7 @@ func NewTaskCollectionPositionMysqlStore(ctx context.Context, db *sql.DB, rootPa } func (m *TaskCollectionPositionMysqlStore) init(ctx context.Context, db *sql.DB, rootPath string) error { - m.log = util.Log.With(zap.String("meta_store", "mysql"), zap.String("table", "task_position"), zap.String("root_path", rootPath)) + m.log = log.With(zap.String("meta_store", "mysql"), zap.String("table", "task_position"), zap.String("root_path", rootPath)).Logger _, err := db.ExecContext(ctx, ` CREATE TABLE IF NOT EXISTS task_position ( task_position_key VARCHAR(255) NOT NULL, diff --git a/server/var.go b/server/var.go index ee372492..4c782792 100644 --- a/server/var.go +++ b/server/var.go @@ -16,10 +16,10 @@ package server -import ( - "github.com/zilliztech/milvus-cdc/core/util" -) - -var ( - log = util.Log -) +// import ( +// "github.com/zilliztech/milvus-cdc/core/util" +// ) +// +// var ( +// log = util.Log +// ) diff --git a/server/writer_callback.go b/server/writer_callback.go index 9c94d415..c4ca7ccc 100644 --- a/server/writer_callback.go +++ b/server/writer_callback.go @@ -18,19 +18,18 @@ package server import ( "context" - "strconv" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" - "github.com/zilliztech/milvus-cdc/core/model" + "github.com/milvus-io/milvus/pkg/log" + "go.uber.org/zap" + "github.com/zilliztech/milvus-cdc/core/util" - "github.com/zilliztech/milvus-cdc/core/writer" "github.com/zilliztech/milvus-cdc/server/metrics" "github.com/zilliztech/milvus-cdc/server/store" - "go.uber.org/zap" ) type WriteCallback struct { - writer.DefaultWriteCallBack + // writer.DefaultWriteCallBack metaStoreFactory store.MetaStoreFactory rootPath string @@ -43,37 +42,37 @@ func NewWriteCallback(factory store.MetaStoreFactory, rootPath string, taskID st metaStoreFactory: factory, rootPath: rootPath, taskID: taskID, - log: log.With(zap.String("task_id", taskID)), + log: log.With(zap.String("task_id", taskID)).Logger, } } -func (w *WriteCallback) OnFail(data *model.CDCData, err error) { - w.log.Warn("fail to write the msg", zap.String("data", util.Base64Encode(data)), zap.Error(err)) - metrics.WriterFailCountVec.WithLabelValues(w.taskID, metrics.WriteFailOnFail).Inc() - _ = store.UpdateTaskFailedReason(w.metaStoreFactory.GetTaskInfoMetaStore(context.Background()), w.taskID, err.Error()) -} - -func (w *WriteCallback) OnSuccess(collectionID int64, channelInfos map[string]writer.CallbackChannelInfo) { - var msgType string - var count int - for channelName, info := range channelInfos { - if info.MsgType == commonpb.MsgType_Insert { - msgType = commonpb.MsgType_Insert.String() - } else if info.MsgType == commonpb.MsgType_Delete { - msgType = commonpb.MsgType_Delete.String() - } - count += info.MsgRowCount - sub := util.SubByNow(info.Ts) - metrics.WriterTimeDifferenceVec.WithLabelValues(w.taskID, strconv.FormatInt(collectionID, 10), channelName).Set(float64(sub)) - } - if msgType != "" { - metrics.WriteMsgRowCountVec.WithLabelValues(w.taskID, strconv.FormatInt(collectionID, 10), msgType).Add(float64(count)) - } - // means it's drop collection message - if len(channelInfos) > 1 { - metrics.StreamingCollectionCountVec.WithLabelValues(w.taskID, metrics.FinishStatusLabel).Inc() - } -} +// func (w *WriteCallback) OnFail(data *model.CDCData, err error) { +// w.log.Warn("fail to write the msg", zap.String("data", util.Base64Encode(data)), zap.Error(err)) +// metrics.WriterFailCountVec.WithLabelValues(w.taskID, metrics.WriteFailOnFail).Inc() +// _ = store.UpdateTaskFailedReason(w.metaStoreFactory.GetTaskInfoMetaStore(context.Background()), w.taskID, err.Error()) +// } +// +// func (w *WriteCallback) OnSuccess(collectionID int64, channelInfos map[string]writer.CallbackChannelInfo) { +// var msgType string +// var count int +// for channelName, info := range channelInfos { +// if info.MsgType == commonpb.MsgType_Insert { +// msgType = commonpb.MsgType_Insert.String() +// } else if info.MsgType == commonpb.MsgType_Delete { +// msgType = commonpb.MsgType_Delete.String() +// } +// count += info.MsgRowCount +// sub := util.SubByNow(info.Ts) +// metrics.WriterTimeDifferenceVec.WithLabelValues(w.taskID, strconv.FormatInt(collectionID, 10), channelName).Set(float64(sub)) +// } +// if msgType != "" { +// metrics.WriteMsgRowCountVec.WithLabelValues(w.taskID, strconv.FormatInt(collectionID, 10), msgType).Add(float64(count)) +// } +// // means it's drop collection message +// if len(channelInfos) > 1 { +// metrics.StreamingCollectionCountVec.WithLabelValues(w.taskID, metrics.FinishStatusLabel).Inc() +// } +// } func (w *WriteCallback) UpdateTaskCollectionPosition(collectionID int64, collectionName string, pChannelName string, position *commonpb.KeyDataPair) { if position == nil {