-
Notifications
You must be signed in to change notification settings - Fork 287
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
binlog: do not decode rows for block table #7622
Changes from 7 commits
eccd29a
d060d48
c5511e7
faa5d80
8d98b89
71ce943
aed1214
9a3e548
a52bbba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,11 +37,14 @@ import ( | |
"go.uber.org/zap" | ||
) | ||
|
||
// the time layout for TiDB SHOW DDL statements. | ||
const timeLayout = "2006-01-02 15:04:05" | ||
|
||
// everytime retrieve 10 new rows from TiDB history jobs. | ||
const linesOfRows = 10 | ||
const ( | ||
// the time layout for TiDB SHOW DDL statements. | ||
timeLayout = "2006-01-02 15:04:05" | ||
// everytime retrieve 10 new rows from TiDB history jobs. | ||
linesOfRows = 10 | ||
// max capacity of the block/allow list. | ||
maxCapacity = 100000 | ||
) | ||
|
||
// getTableByDML gets table from INSERT/UPDATE/DELETE statement. | ||
func getTableByDML(dml ast.DMLNode) (*filter.Table, error) { | ||
|
@@ -136,7 +139,7 @@ func str2TimezoneOrFromDB(tctx *tcontext.Context, tzStr string, dbCfg *config.DB | |
return loc, tzStr, nil | ||
} | ||
|
||
func subtaskCfg2BinlogSyncerCfg(cfg *config.SubTaskConfig, timezone *time.Location) (replication.BinlogSyncerConfig, error) { | ||
func subtaskCfg2BinlogSyncerCfg(cfg *config.SubTaskConfig, timezone *time.Location, baList *filter.Filter) (replication.BinlogSyncerConfig, error) { | ||
var tlsConfig *tls.Config | ||
var err error | ||
if cfg.From.Security != nil { | ||
|
@@ -153,6 +156,44 @@ func subtaskCfg2BinlogSyncerCfg(cfg *config.SubTaskConfig, timezone *time.Locati | |
} | ||
} | ||
|
||
var rowsEventDecodeFunc func(*replication.RowsEvent, []byte) error | ||
if baList != nil { | ||
// we don't track delete table events, so simply reset the cache if it's full | ||
// TODO: use LRU or CLOCK cache if needed. | ||
allowListCache := make(map[uint64]struct{}, maxCapacity) | ||
blockListCache := make(map[uint64]struct{}, maxCapacity) | ||
|
||
rowsEventDecodeFunc = func(re *replication.RowsEvent, data []byte) error { | ||
pos, err := re.DecodeHeader(data) | ||
if err != nil { | ||
return err | ||
} | ||
if _, ok := blockListCache[re.TableID]; ok { | ||
return nil | ||
} else if _, ok := allowListCache[re.TableID]; ok { | ||
return re.DecodeData(pos, data) | ||
} | ||
|
||
tb := &filter.Table{ | ||
Schema: string(re.Table.Schema), | ||
Name: string(re.Table.Table), | ||
} | ||
if skipByTable(baList, tb) { | ||
if len(blockListCache) >= maxCapacity { | ||
blockListCache = make(map[uint64]struct{}, maxCapacity) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will prefer to evict one record rather than reset the whole cache. Maybe an LRU or CLOCK cache is more appropriate. But it might be rare to run out of capacity, so it's okay. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a TODO c5511e7 |
||
} | ||
blockListCache[re.TableID] = struct{}{} | ||
return nil | ||
} | ||
|
||
if len(allowListCache) >= maxCapacity { | ||
allowListCache = make(map[uint64]struct{}, maxCapacity) | ||
} | ||
allowListCache[re.TableID] = struct{}{} | ||
return re.DecodeData(pos, data) | ||
} | ||
} | ||
|
||
syncCfg := replication.BinlogSyncerConfig{ | ||
ServerID: cfg.ServerID, | ||
Flavor: cfg.Flavor, | ||
|
@@ -162,6 +203,7 @@ func subtaskCfg2BinlogSyncerCfg(cfg *config.SubTaskConfig, timezone *time.Locati | |
Password: cfg.From.Password, | ||
TimestampStringLocation: timezone, | ||
TLSConfig: tlsConfig, | ||
RowsEventDecodeFunc: rowsEventDecodeFunc, | ||
} | ||
// when retry count > 1, go-mysql will retry sync from the previous GTID set in GTID mode, | ||
// which may get duplicate binlog event after retry success. so just set retry count = 1, and task | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# diff Configuration. | ||
|
||
check-thread-count = 4 | ||
|
||
export-fix-sql = true | ||
|
||
check-struct-only = false | ||
|
||
[task] | ||
output-dir = "/tmp/ticdc_dm_test/output" | ||
|
||
source-instances = ["mysql1"] | ||
|
||
target-instance = "tidb0" | ||
|
||
target-check-tables = ["binlog_parse.t1"] | ||
|
||
[data-sources] | ||
[data-sources.mysql1] | ||
host = "127.0.0.1" | ||
port = 3306 | ||
user = "root" | ||
password = "123456" | ||
|
||
[data-sources.tidb0] | ||
host = "127.0.0.1" | ||
port = 4000 | ||
user = "test" | ||
password = "123456" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Master Configuration. | ||
master-addr = ":8261" | ||
advertise-addr = "127.0.0.1:8261" | ||
auto-compaction-retention = "3s" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
--- | ||
name: test | ||
task-mode: all | ||
is-sharding: false | ||
meta-schema: "dm_meta" | ||
|
||
target-database: | ||
host: "127.0.0.1" | ||
port: 4000 | ||
user: "test" | ||
password: "/Q7B9DizNLLTTfiZHv9WoEAKamfpIUs=" | ||
|
||
mysql-instances: | ||
- source-id: "mysql-replica-01" | ||
block-allow-list: "instance" | ||
mydumper-config-name: "global" | ||
loader-config-name: "global" | ||
syncer-config-name: "global" | ||
|
||
block-allow-list: | ||
instance: | ||
do-dbs: ["binlog_parse"] | ||
do-tables: | ||
- db-name: "binlog_parse" | ||
tbl-name: "t1" | ||
|
||
mydumpers: | ||
global: | ||
threads: 4 | ||
chunk-filesize: 64 | ||
skip-tz-utc: true | ||
extra-args: "" | ||
|
||
loaders: | ||
global: | ||
pool-size: 16 | ||
dir: "./dumped_data" | ||
|
||
syncers: | ||
global: | ||
worker-count: 16 | ||
batch: 100 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
name = "worker1" | ||
join = "127.0.0.1:8261" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
source-id: mysql-replica-01 | ||
enable-gtid: true | ||
relay-binlog-name: '' | ||
relay-binlog-gtid: '' | ||
enable-relay: false | ||
from: | ||
host: 127.0.0.1 | ||
user: root | ||
password: /Q7B9DizNLLTTfiZHv9WoEAKamfpIUs= | ||
port: 3306 | ||
checker: | ||
check-enable: false |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
use binlog_parse; | ||
insert into t1 (id, created_time) values (3, '2022-01-03 00:00:01'), (4, '2022-01-04 00:00:01'); | ||
insert into t2 (id, created_time) values (3, '2022-01-03 00:00:01'), (4, '2022-01-04 00:00:01'); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
use binlog_parse; | ||
insert into t1 (id, created_time) values (5, '2022-01-05 00:00:01'), (6, '2022-01-06 00:00:01'); | ||
insert into t2 (id, created_time) values (5, '2022-01-05 00:00:01'), (6, '2022-01-06 00:00:01'); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
drop database if exists `binlog_parse`; | ||
create database `binlog_parse` character set utf8; | ||
use `binlog_parse`; | ||
create table t1 (id int, created_time timestamp, primary key(`id`)) character set utf8; | ||
create table t2 (id int, created_time timestamp(3), primary key(`id`)) character set utf8; | ||
insert into t1 (id, created_time) values (1, '2022-01-01 00:00:01'), (2, '2022-01-02 00:00:01'); | ||
insert into t2 (id, created_time) values (1, '2022-01-01 00:00:01'), (2, '2022-01-02 00:00:01'); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#!/bin/bash | ||
|
||
set -eu | ||
|
||
cur=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) | ||
source $cur/../_utils/test_prepare | ||
WORK_DIR=$TEST_DIR/$TEST_NAME | ||
|
||
# skip one tale, sync another table | ||
# mariadb10.0 timestamp(3) will panic before dm v6.4.0 | ||
function run() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would you leave a comment to illustrate what this test intends to do? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you mean this used to manually test MariaDB? I remember we don't have MariaDB in CI There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, v10.0 |
||
run_dm_master $WORK_DIR/master $MASTER_PORT $cur/conf/dm-master.toml | ||
check_rpc_alive $cur/../bin/check_master_online 127.0.0.1:$MASTER_PORT | ||
run_dm_worker $WORK_DIR/worker1 $WORKER1_PORT $cur/conf/dm-worker1.toml | ||
check_rpc_alive $cur/../bin/check_worker_online 127.0.0.1:$WORKER1_PORT | ||
|
||
# operate mysql config to worker | ||
cp $cur/conf/source1.yaml $WORK_DIR/source1.yaml | ||
dmctl_operate_source create $WORK_DIR/source1.yaml $SOURCE_ID1 | ||
|
||
echo "prepare data" | ||
run_sql_file $cur/data/db1.prepare.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 | ||
|
||
echo "start task" | ||
run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ | ||
"start-task $cur/conf/dm-task.yaml --remove-meta" | ||
|
||
echo "check full phase" | ||
check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 30 | ||
|
||
run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ | ||
"validation start test" \ | ||
"\"result\": true" 1 | ||
|
||
echo "prepare incremental data" | ||
run_sql_file $cur/data/db1.increment.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 | ||
|
||
echo "check incremental phase" | ||
check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 30 | ||
|
||
run_sql_tidb_with_retry "select count(1) from binlog_parse.t1;" "count(1): 4" | ||
|
||
# relay error in mariadb:10.0, success in mysql | ||
run_dm_ctl_with_retry $WORK_DIR "127.0.0.1:$MASTER_PORT" \ | ||
"start-relay -s $SOURCE_ID1 worker1" \ | ||
"\"result\": true" 2 \ | ||
"\"source\": \"$SOURCE_ID1\"" 1 \ | ||
"\"worker\": \"worker1\"" 1 | ||
# "TCPReader get relay event with error" 1 | ||
|
||
echo "prepare incremental data 2" | ||
run_sql_file $cur/data/db1.increment1.sql $MYSQL_HOST1 $MYSQL_PORT1 $MYSQL_PASSWORD1 | ||
run_dm_ctl $WORK_DIR "127.0.0.1:$MASTER_PORT" \ | ||
"validation start test" \ | ||
"\"result\": true" 1 | ||
|
||
echo "check incremental phase 2" | ||
check_sync_diff $WORK_DIR $cur/conf/diff_config.toml 30 | ||
} | ||
|
||
cleanup_data $TEST_NAME | ||
# also cleanup dm processes in case of last run failed | ||
cleanup_process $* | ||
run $* | ||
cleanup_process $* | ||
|
||
echo "[$(date)] <<<<<< test case $TEST_NAME success! >>>>>>" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ sequence_sharding_optimistic | |
sequence_sharding_removemeta | ||
gtid | ||
only_dml | ||
binlog_parse | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. others_integration_1.txt is the slowest stage in CI! please move it to other_2 or 3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to abandon these blocked events while using a remote binlog reader?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
c.syncCfg is created by
tiflow/dm/syncer/util.go
Line 142 in d060d48