-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
cockroachdb: add high-availability support #12965
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
```release-note:improvement | ||
cockroachdb: add high-availability support | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
package cockroachdb | ||
|
||
import ( | ||
"database/sql" | ||
"fmt" | ||
"sync" | ||
"time" | ||
|
||
"github.com/hashicorp/go-uuid" | ||
"github.com/hashicorp/vault/sdk/physical" | ||
) | ||
|
||
const ( | ||
// The lock TTL matches the default that Consul API uses, 15 seconds. | ||
// Used as part of SQL commands to set/extend lock expiry time relative to | ||
// database clock. | ||
CockroachDBLockTTLSeconds = 15 | ||
|
||
// The amount of time to wait between the lock renewals | ||
CockroachDBLockRenewInterval = 5 * time.Second | ||
|
||
// CockroachDBLockRetryInterval is the amount of time to wait | ||
// if a lock fails before trying again. | ||
CockroachDBLockRetryInterval = time.Second | ||
) | ||
|
||
// Verify backend satisfies the correct interfaces. | ||
var ( | ||
_ physical.HABackend = (*CockroachDBBackend)(nil) | ||
_ physical.Lock = (*CockroachDBLock)(nil) | ||
) | ||
|
||
type CockroachDBLock struct { | ||
backend *CockroachDBBackend | ||
key string | ||
value string | ||
identity string | ||
lock sync.Mutex | ||
|
||
renewTicker *time.Ticker | ||
|
||
// ttlSeconds is how long a lock is valid for. | ||
ttlSeconds int | ||
|
||
// renewInterval is how much time to wait between lock renewals. must be << ttl. | ||
renewInterval time.Duration | ||
|
||
// retryInterval is how much time to wait between attempts to grab the lock. | ||
retryInterval time.Duration | ||
} | ||
|
||
func (c *CockroachDBBackend) HAEnabled() bool { | ||
return c.haEnabled | ||
} | ||
|
||
func (c *CockroachDBBackend) LockWith(key, value string) (physical.Lock, error) { | ||
identity, err := uuid.GenerateUUID() | ||
if err != nil { | ||
return nil, err | ||
} | ||
return &CockroachDBLock{ | ||
backend: c, | ||
key: key, | ||
value: value, | ||
identity: identity, | ||
ttlSeconds: CockroachDBLockTTLSeconds, | ||
renewInterval: CockroachDBLockRenewInterval, | ||
retryInterval: CockroachDBLockRetryInterval, | ||
}, nil | ||
} | ||
|
||
// Lock tries to acquire the lock by repeatedly trying to create a record in the | ||
// CockroachDB table. It will block until either the stop channel is closed or | ||
// the lock could be acquired successfully. The returned channel will be closed | ||
// once the lock in the CockroachDB table cannot be renewed, either due to an | ||
// error speaking to CockroachDB or because someone else has taken it. | ||
func (l *CockroachDBLock) Lock(stopCh <-chan struct{}) (<-chan struct{}, error) { | ||
l.lock.Lock() | ||
defer l.lock.Unlock() | ||
|
||
var ( | ||
success = make(chan struct{}) | ||
errors = make(chan error, 1) | ||
leader = make(chan struct{}) | ||
) | ||
go l.tryToLock(stopCh, success, errors) | ||
|
||
select { | ||
case <-success: | ||
// After acquiring it successfully, we must renew the lock periodically. | ||
l.renewTicker = time.NewTicker(l.renewInterval) | ||
go l.periodicallyRenewLock(leader) | ||
case err := <-errors: | ||
return nil, err | ||
case <-stopCh: | ||
return nil, nil | ||
} | ||
|
||
return leader, nil | ||
} | ||
|
||
// Unlock releases the lock by deleting the lock record from the | ||
// CockroachDB table. | ||
func (l *CockroachDBLock) Unlock() error { | ||
c := l.backend | ||
c.permitPool.Acquire() | ||
defer c.permitPool.Release() | ||
|
||
if l.renewTicker != nil { | ||
l.renewTicker.Stop() | ||
} | ||
|
||
_, err := c.haStatements["delete"].Exec(l.key) | ||
return err | ||
} | ||
|
||
// Value checks whether or not the lock is held by any instance of CockroachDBLock, | ||
// including this one, and returns the current value. | ||
func (l *CockroachDBLock) Value() (bool, string, error) { | ||
c := l.backend | ||
c.permitPool.Acquire() | ||
defer c.permitPool.Release() | ||
var result string | ||
err := c.haStatements["get"].QueryRow(l.key).Scan(&result) | ||
|
||
switch err { | ||
case nil: | ||
return true, result, nil | ||
case sql.ErrNoRows: | ||
return false, "", nil | ||
default: | ||
return false, "", err | ||
|
||
} | ||
} | ||
|
||
// tryToLock tries to create a new item in CockroachDB every `retryInterval`. | ||
// As long as the item cannot be created (because it already exists), it will | ||
// be retried. If the operation fails due to an error, it is sent to the errors | ||
// channel. When the lock could be acquired successfully, the success channel | ||
// is closed. | ||
func (l *CockroachDBLock) tryToLock(stop <-chan struct{}, success chan struct{}, errors chan error) { | ||
ticker := time.NewTicker(l.retryInterval) | ||
defer ticker.Stop() | ||
|
||
for { | ||
select { | ||
case <-stop: | ||
return | ||
case <-ticker.C: | ||
gotlock, err := l.writeItem() | ||
switch { | ||
case err != nil: | ||
// Send to the error channel and don't block if full. | ||
select { | ||
case errors <- err: | ||
default: | ||
} | ||
return | ||
case gotlock: | ||
close(success) | ||
return | ||
} | ||
} | ||
} | ||
} | ||
|
||
func (l *CockroachDBLock) periodicallyRenewLock(done chan struct{}) { | ||
for range l.renewTicker.C { | ||
gotlock, err := l.writeItem() | ||
if err != nil || !gotlock { | ||
close(done) | ||
l.renewTicker.Stop() | ||
return | ||
} | ||
} | ||
} | ||
|
||
// Attempts to put/update the CockroachDB item using condition expressions to | ||
// evaluate the TTL. Returns true if the lock was obtained, false if not. | ||
// If false error may be nil or non-nil: nil indicates simply that someone | ||
// else has the lock, whereas non-nil means that something unexpected happened. | ||
func (l *CockroachDBLock) writeItem() (bool, error) { | ||
c := l.backend | ||
c.permitPool.Acquire() | ||
defer c.permitPool.Release() | ||
|
||
sqlResult, err := c.haStatements["upsert"].Exec(l.identity, l.key, l.value, l.ttlSeconds) | ||
if err != nil { | ||
return false, err | ||
} | ||
if sqlResult == nil { | ||
return false, fmt.Errorf("empty SQL response received") | ||
} | ||
|
||
ar, err := sqlResult.RowsAffected() | ||
if err != nil { | ||
return false, err | ||
} | ||
return ar == 1, nil | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know little about CockroachDB, but I thought it was fully compatible with PG. You've been diligent about adapting the PG approach here, but I noticed this one discrepancy: instead of
NOW() + $4 * INTERVAL '1 seconds'
you have simplyNOW() + $4
. Why's that?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's happening here is that
$4
is being passed in as a string type, not an integer type. When we attempt to to$4 * INTERVAL '1 seconds'
, we get a conversion error:We would have the option of casting
$4
to an integer, and then multiplying by an interval:NOW() + $4::int * INTERVAL '1 seconds'
.However, $4 is already a value in seconds, and so casting to an int just to multiply by 1 second is unnecessary. It's more succinct to just add the string value directly to
NOW()
.