-
Notifications
You must be signed in to change notification settings - Fork 720
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
mcs: fix rule sync when meet "no rule left" and concurrency #7481
Changes from 2 commits
7866780
10af033
9e006d3
d785df3
23d6032
bd5acaa
48afd51
e2a79c6
ac72124
9f72fe8
8b020e9
0c70123
89ff1fe
ff23490
4702d2a
13e49f8
520fbcb
4087916
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ | |
"github.com/tikv/pd/pkg/schedule/placement" | ||
"github.com/tikv/pd/pkg/storage/endpoint" | ||
"github.com/tikv/pd/pkg/utils/etcdutil" | ||
"github.com/tikv/pd/pkg/utils/syncutil" | ||
"go.etcd.io/etcd/clientv3" | ||
"go.etcd.io/etcd/mvcc/mvccpb" | ||
"go.uber.org/zap" | ||
|
@@ -62,6 +63,17 @@ | |
ruleWatcher *etcdutil.LoopWatcher | ||
rleungx marked this conversation as resolved.
Show resolved
Hide resolved
|
||
groupWatcher *etcdutil.LoopWatcher | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still worry about whether two watchers will get the event disordered and overwrite the same rule. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will add a test about it. If necessary, I will merge these watchers. |
||
labelWatcher *etcdutil.LoopWatcher | ||
|
||
// pendingDeletion is a structure used to track the rules or rule groups that are marked for deletion. | ||
// If a rule or rule group cannot be deleted immediately due to the absence of rules, | ||
// it will be held here and removed later when a new rule or rule group put event allows for its deletion. | ||
pendingDeletion struct { | ||
syncutil.RWMutex | ||
// key: path, value: [groupID, ruleID] | ||
// The map 'kvs' holds the rules or rule groups that are pending deletion. | ||
// If a rule group needs to be deleted, the ruleID will be an empty string. | ||
kvs map[string][2]string | ||
} | ||
} | ||
|
||
// NewWatcher creates a new watcher to watch the Placement Rule change from PD API server. | ||
|
@@ -86,6 +98,12 @@ | |
checkerController: checkerController, | ||
ruleManager: ruleManager, | ||
regionLabeler: regionLabeler, | ||
pendingDeletion: struct { | ||
syncutil.RWMutex | ||
kvs map[string][2]string | ||
}{ | ||
kvs: make(map[string][2]string), | ||
}, | ||
} | ||
err := rw.initializeRuleWatcher() | ||
if err != nil { | ||
|
@@ -115,7 +133,11 @@ | |
if oldRule := rw.ruleManager.GetRule(rule.GroupID, rule.ID); oldRule != nil { | ||
rw.checkerController.AddSuspectKeyRange(oldRule.StartKey, oldRule.EndKey) | ||
} | ||
return rw.ruleManager.SetRule(rule) | ||
err = rw.ruleManager.SetRule(rule) | ||
if err == nil && rw.hasPendingDeletion() { | ||
rw.tryFinishPendingDeletion() | ||
} | ||
return err | ||
} | ||
deleteFn := func(kv *mvccpb.KeyValue) error { | ||
key := string(kv.Key) | ||
|
@@ -129,7 +151,11 @@ | |
return err | ||
} | ||
rw.checkerController.AddSuspectKeyRange(rule.StartKey, rule.EndKey) | ||
return rw.ruleManager.DeleteRule(rule.GroupID, rule.ID) | ||
err = rw.ruleManager.DeleteRule(rule.GroupID, rule.ID) | ||
if err != nil && strings.Contains(err.Error(), "no rule left") { | ||
rw.addPendingDeletion(key, rule.GroupID, rule.ID) | ||
} | ||
return err | ||
} | ||
postEventFn := func() error { | ||
return nil | ||
|
@@ -157,7 +183,11 @@ | |
for _, rule := range rw.ruleManager.GetRulesByGroup(ruleGroup.ID) { | ||
rw.checkerController.AddSuspectKeyRange(rule.StartKey, rule.EndKey) | ||
} | ||
return rw.ruleManager.SetRuleGroup(ruleGroup) | ||
err = rw.ruleManager.SetRuleGroup(ruleGroup) | ||
if err == nil && rw.hasPendingDeletion() { | ||
rw.tryFinishPendingDeletion() | ||
} | ||
return err | ||
} | ||
deleteFn := func(kv *mvccpb.KeyValue) error { | ||
key := string(kv.Key) | ||
|
@@ -166,7 +196,11 @@ | |
for _, rule := range rw.ruleManager.GetRulesByGroup(trimmedKey) { | ||
rw.checkerController.AddSuspectKeyRange(rule.StartKey, rule.EndKey) | ||
} | ||
return rw.ruleManager.DeleteRuleGroup(trimmedKey) | ||
err := rw.ruleManager.DeleteRuleGroup(trimmedKey) | ||
if err != nil && strings.Contains(err.Error(), "no rule left") { | ||
rw.addPendingDeletion(key, trimmedKey, "") | ||
} | ||
return err | ||
} | ||
postEventFn := func() error { | ||
return nil | ||
|
@@ -216,3 +250,40 @@ | |
rw.cancel() | ||
rw.wg.Wait() | ||
} | ||
|
||
func (rw *Watcher) hasPendingDeletion() bool { | ||
rw.pendingDeletion.RLock() | ||
defer rw.pendingDeletion.RUnlock() | ||
return len(rw.pendingDeletion.kvs) > 0 | ||
} | ||
|
||
func (rw *Watcher) addPendingDeletion(path, groupID, ruleID string) { | ||
rw.pendingDeletion.Lock() | ||
defer rw.pendingDeletion.Unlock() | ||
rw.pendingDeletion.kvs[path] = [2]string{groupID, ruleID} | ||
} | ||
|
||
func (rw *Watcher) tryFinishPendingDeletion() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am worried about if the put and delete can be disordered. If so, the newly added rule might be deleted unexpectedly. |
||
rw.pendingDeletion.Lock() | ||
defer rw.pendingDeletion.Unlock() | ||
originLen := len(rw.pendingDeletion.kvs) | ||
for k, v := range rw.pendingDeletion.kvs { | ||
groupID, ruleID := v[0], v[1] | ||
var err error | ||
if ruleID == "" { | ||
err = rw.ruleManager.DeleteRuleGroup(groupID) | ||
} else { | ||
err = rw.ruleManager.DeleteRule(groupID, ruleID) | ||
} | ||
if err == nil { | ||
delete(rw.pendingDeletion.kvs, k) | ||
} | ||
} | ||
// If the length of the map is changed, it means that some rules or rule groups have been deleted. | ||
// We need to force load the rules and rule groups to make sure sync with etcd. | ||
if len(rw.pendingDeletion.kvs) != originLen { | ||
rw.ruleWatcher.ForceLoad() | ||
rw.groupWatcher.ForceLoad() | ||
log.Info("force load rules", zap.Int("pending deletion", len(rw.pendingDeletion.kvs)), zap.Int("origin", originLen)) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the reason for this modification? Do we need to modify the corresponding API interface for the non-API mode?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Avoid failure in JSON unmarshalling and maintain consistency with PD mode.