-
Notifications
You must be signed in to change notification settings - Fork 35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: data race related to last commit info; flush pruning heights as soon as changed #154
Changes from 3 commits
a173239
6a0efe2
4ed4275
3138f08
0906e86
c5d2ad1
451ead5
e18789b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ import ( | |
|
||
type Manager struct { | ||
logger log.Logger | ||
db dbm.DB | ||
opts *types.PruningOptions | ||
snapshotInterval uint64 | ||
pruneHeights []int64 | ||
|
@@ -26,9 +27,10 @@ const ( | |
pruneSnapshotHeightsKey = "s/pruneSnheights" | ||
) | ||
|
||
func NewManager(logger log.Logger) *Manager { | ||
func NewManager(logger log.Logger, db dbm.DB) *Manager { | ||
return &Manager{ | ||
logger: logger, | ||
db: db, | ||
opts: types.NewPruningOptions(types.PruningNothing), | ||
pruneHeights: []int64{}, | ||
// These are the heights that are multiples of snapshotInterval and kept for state sync snapshots. | ||
|
@@ -82,6 +84,9 @@ func (m *Manager) HandleHeight(previousHeight int64) int64 { | |
next = e.Next() | ||
} | ||
} | ||
|
||
// Must be unlocked since we are under mutex | ||
m.flushAllPruningHeightsUnlocked() | ||
}() | ||
|
||
if int64(m.opts.KeepRecent) < previousHeight { | ||
|
@@ -119,15 +124,6 @@ func (m *Manager) ShouldPruneAtHeight(height int64) bool { | |
return m.opts.GetPruningStrategy() != types.PruningNothing && m.opts.Interval > 0 && height%int64(m.opts.Interval) == 0 | ||
} | ||
|
||
// FlushPruningHeights flushes the pruning heights to the database for crash recovery. | ||
func (m *Manager) FlushPruningHeights(batch dbm.Batch) { | ||
if m.opts.GetPruningStrategy() == types.PruningNothing { | ||
return | ||
} | ||
m.flushPruningHeights(batch) | ||
m.flushPruningSnapshotHeights(batch) | ||
} | ||
|
||
// LoadPruningHeights loads the pruning heights from the database as a crash recovery. | ||
func (m *Manager) LoadPruningHeights(db dbm.DB) error { | ||
if m.opts.GetPruningStrategy() == types.PruningNothing { | ||
|
@@ -192,6 +188,40 @@ func (m *Manager) loadPruningSnapshotHeights(db dbm.DB) error { | |
return nil | ||
} | ||
|
||
// FlushAllPruningHeights flushes the pruning heights to the database for crash recovery. | ||
// "All" refers to regular pruning heights and snapshot heights, if any. | ||
func (m *Manager) FlushAllPruningHeights() { | ||
if m.opts.GetPruningStrategy() == types.PruningNothing { | ||
return | ||
} | ||
batch := m.db.NewBatch() | ||
defer batch.Close() | ||
m.flushPruningHeights(batch) | ||
m.flushPruningSnapshotHeights(batch) | ||
|
||
if err := batch.Write(); err != nil { | ||
panic(fmt.Errorf("error on batch write %w", err)) | ||
} | ||
} | ||
|
||
// flushAllPruningHeightsUnlocked flushes the pruning heights to the database for crash recovery. | ||
// "All" refers to regular pruning heights and snapshot heights, if any. | ||
// It serves the same function as exported FlushPruningHeights. However, it assummes that | ||
// mutex was acquired prior to calling this method. | ||
func (m *Manager) flushAllPruningHeightsUnlocked() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is needed because we already flush under lock in |
||
if m.opts.GetPruningStrategy() == types.PruningNothing { | ||
return | ||
} | ||
batch := m.db.NewBatch() | ||
defer batch.Close() | ||
m.flushPruningHeights(batch) | ||
m.flushPruningSnapshotHeightsUnlocked(batch) | ||
|
||
if err := batch.Write(); err != nil { | ||
panic(fmt.Errorf("error on batch write %w", err)) | ||
} | ||
} | ||
|
||
func (m *Manager) flushPruningHeights(batch dbm.Batch) { | ||
bz := make([]byte, 0) | ||
for _, ph := range m.pruneHeights { | ||
|
@@ -200,17 +230,25 @@ func (m *Manager) flushPruningHeights(batch dbm.Batch) { | |
bz = append(bz, buf...) | ||
} | ||
|
||
batch.Set([]byte(pruneHeightsKey), bz) | ||
if err := batch.Set([]byte(pruneHeightsKey), bz); err != nil { | ||
panic(err) | ||
} | ||
} | ||
|
||
func (m *Manager) flushPruningSnapshotHeights(batch dbm.Batch) { | ||
m.mx.Lock() | ||
defer m.mx.Unlock() | ||
m.flushPruningSnapshotHeightsUnlocked(batch) | ||
} | ||
|
||
func (m *Manager) flushPruningSnapshotHeightsUnlocked(batch dbm.Batch) { | ||
bz := make([]byte, 0) | ||
for e := m.pruneSnapshotHeights.Front(); e != nil; e = e.Next() { | ||
buf := make([]byte, 8) | ||
binary.BigEndian.PutUint64(buf, uint64(e.Value.(int64))) | ||
bz = append(bz, buf...) | ||
} | ||
batch.Set([]byte(pruneSnapshotHeightsKey), bz) | ||
if err := batch.Set([]byte(pruneSnapshotHeightsKey), bz); err != nil { | ||
panic(err) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ import ( | |
"math" | ||
"sort" | ||
"strings" | ||
"sync" | ||
|
||
"github.com/cosmos/cosmos-sdk/pruning" | ||
pruningTypes "github.com/cosmos/cosmos-sdk/pruning/types" | ||
|
@@ -42,13 +43,21 @@ const ( | |
snapshotMaxItemSize = int(64e6) // SDK has no key/value size limit, so we set an arbitrary limit | ||
) | ||
|
||
type storeParams struct { | ||
key types.StoreKey | ||
db dbm.DB | ||
typ types.StoreType | ||
initialVersion uint64 | ||
} | ||
|
||
// Store is composed of many CommitStores. Name contrasts with | ||
// cacheMultiStore which is used for branching other MultiStores. It implements | ||
// the CommitMultiStore interface. | ||
type Store struct { | ||
db dbm.DB | ||
logger log.Logger | ||
lastCommitInfo *types.CommitInfo | ||
mx *sync.RWMutex // mutex to sync access to lastCommitInfo | ||
pruningManager *pruning.Manager | ||
iavlCacheSize int | ||
storesParams map[types.StoreKey]storeParams | ||
|
@@ -83,7 +92,8 @@ func NewStore(db dbm.DB, logger log.Logger) *Store { | |
stores: make(map[types.StoreKey]types.CommitKVStore), | ||
keysByName: make(map[string]types.StoreKey), | ||
listeners: make(map[types.StoreKey][]types.WriteListener), | ||
pruningManager: pruning.NewManager(logger), | ||
pruningManager: pruning.NewManager(logger, db), | ||
mx: &sync.RWMutex{}, | ||
} | ||
} | ||
|
||
|
@@ -194,7 +204,7 @@ func (rs *Store) loadVersion(ver int64, upgrades *types.StoreUpgrades) error { | |
// load old data if we are not version 0 | ||
if ver != 0 { | ||
var err error | ||
cInfo, err = getCommitInfo(rs.db, ver) | ||
cInfo, err = rs.getCommitInfoFromDb(ver) | ||
if err != nil { | ||
return err | ||
} | ||
|
@@ -375,6 +385,8 @@ func (rs *Store) ListeningEnabled(key types.StoreKey) bool { | |
|
||
// LastCommitID implements Committer/CommitStore. | ||
func (rs *Store) LastCommitID() types.CommitID { | ||
rs.mx.RLock() | ||
defer rs.mx.RUnlock() | ||
if rs.lastCommitInfo == nil { | ||
return types.CommitID{ | ||
Version: getLatestVersion(rs.db), | ||
|
@@ -402,19 +414,17 @@ func (rs *Store) Commit() types.CommitID { | |
version = previousHeight + 1 | ||
} | ||
|
||
rs.lastCommitInfo = rs.commitStores(version, rs.stores) | ||
|
||
var pruneErr error | ||
defer func() { | ||
rs.flushMetadata(rs.db, version, rs.lastCommitInfo) | ||
if pruneErr != nil { | ||
panic(pruneErr) | ||
} | ||
}() | ||
newCommitInfo := rs.commitStores(version, rs.stores) | ||
rs.updateLatestCommitInfo(newCommitInfo, version) | ||
|
||
pruneErr = rs.handlePruning(version) | ||
err := rs.handlePruning(version) | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
rs.mx.RLock() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be redundant because we only write in the same goroutine above. However, there can be concurrent readers. What do reviewers think? |
||
hash, keys := rs.lastCommitInfo.Hash() | ||
defer rs.mx.RUnlock() | ||
rs.logger.Info("calculated commit hash", "height", version, "commit_hash", hash, "keys", keys) | ||
|
||
return types.CommitID{ | ||
|
@@ -536,7 +546,7 @@ func (rs *Store) handlePruning(version int64) error { | |
|
||
if err := store.(*iavl.Store).DeleteVersions(pruningHeights...); err != nil { | ||
if errCause := errors.Cause(err); errCause != nil && errCause != iavltree.ErrVersionDoesNotExist { | ||
return err | ||
panic(err) | ||
} | ||
} | ||
} | ||
|
@@ -594,18 +604,10 @@ func (rs *Store) Query(req abci.RequestQuery) abci.ResponseQuery { | |
return sdkerrors.QueryResult(sdkerrors.Wrap(sdkerrors.ErrInvalidRequest, "proof is unexpectedly empty; ensure height has not been pruned")) | ||
} | ||
|
||
// If the request's height is the latest height we've committed, then utilize | ||
// the store's lastCommitInfo as this commit info may not be flushed to disk. | ||
// Otherwise, we query for the commit info from disk. | ||
var commitInfo *types.CommitInfo | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We now flush to disk as soon as |
||
if res.Height == rs.lastCommitInfo.Version { | ||
commitInfo = rs.lastCommitInfo | ||
} else { | ||
commitInfo, err = getCommitInfo(rs.db, res.Height) | ||
if err != nil { | ||
return sdkerrors.QueryResult(err) | ||
} | ||
commitInfo, err := rs.getCommitInfoFromDb(res.Height) | ||
if err != nil { | ||
return sdkerrors.QueryResult(err) | ||
} | ||
|
||
// Restore origin path and append proof op. | ||
|
@@ -878,7 +880,8 @@ func (rs *Store) Restore( | |
importer.Close() | ||
} | ||
|
||
rs.flushMetadata(rs.db, int64(height), rs.buildCommitInfo(int64(height))) | ||
rs.flushLastCommitInfoAndLatestVersion(rs.buildCommitInfo(int64(height))) | ||
rs.pruningManager.FlushAllPruningHeights() | ||
return rs.LoadLatestVersion() | ||
} | ||
|
||
|
@@ -983,26 +986,42 @@ func (rs *Store) commitStores(version int64, storeMap map[types.StoreKey]types.C | |
} | ||
} | ||
|
||
func (rs *Store) flushMetadata(db dbm.DB, version int64, cInfo *types.CommitInfo) { | ||
rs.logger.Debug("flushing metadata", "height", version) | ||
batch := db.NewBatch() | ||
func (rs *Store) updateLatestCommitInfo(newCommitInfo *types.CommitInfo, version int64) { | ||
rs.mx.Lock() | ||
defer rs.mx.Unlock() | ||
rs.lastCommitInfo = newCommitInfo | ||
rs.flushLastCommitInfoAndLatestVersion(newCommitInfo) | ||
} | ||
|
||
func (rs *Store) flushLastCommitInfoAndLatestVersion(cInfo *types.CommitInfo) { | ||
batch := rs.db.NewBatch() | ||
defer batch.Close() | ||
|
||
flushCommitInfo(batch, version, cInfo) | ||
flushLatestVersion(batch, version) | ||
rs.pruningManager.FlushPruningHeights(batch) | ||
flushCommitInfo(batch, cInfo) | ||
flushLatestVersion(batch, cInfo.Version) | ||
|
||
if err := batch.Write(); err != nil { | ||
panic(fmt.Errorf("error on batch write %w", err)) | ||
} | ||
rs.logger.Debug("flushing metadata finished", "height", version) | ||
} | ||
|
||
type storeParams struct { | ||
key types.StoreKey | ||
db dbm.DB | ||
typ types.StoreType | ||
initialVersion uint64 | ||
// Gets commitInfo from disk. | ||
func (rs *Store) getCommitInfoFromDb(ver int64) (*types.CommitInfo, error) { | ||
cInfoKey := fmt.Sprintf(commitInfoKeyFmt, ver) | ||
|
||
bz, err := rs.db.Get([]byte(cInfoKey)) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to get commit info") | ||
} else if bz == nil { | ||
return nil, errors.New("no commit info found") | ||
} | ||
|
||
cInfo := &types.CommitInfo{} | ||
if err = cInfo.Unmarshal(bz); err != nil { | ||
return nil, errors.Wrap(err, "failed unmarshal commit info") | ||
} | ||
|
||
return cInfo, nil | ||
} | ||
|
||
func getLatestVersion(db dbm.DB) int64 { | ||
|
@@ -1022,33 +1041,16 @@ func getLatestVersion(db dbm.DB) int64 { | |
return latestVersion | ||
} | ||
|
||
// Gets commitInfo from disk. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved up |
||
func getCommitInfo(db dbm.DB, ver int64) (*types.CommitInfo, error) { | ||
cInfoKey := fmt.Sprintf(commitInfoKeyFmt, ver) | ||
|
||
bz, err := db.Get([]byte(cInfoKey)) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to get commit info") | ||
} else if bz == nil { | ||
return nil, errors.New("no commit info found") | ||
} | ||
|
||
cInfo := &types.CommitInfo{} | ||
if err = cInfo.Unmarshal(bz); err != nil { | ||
return nil, errors.Wrap(err, "failed unmarshal commit info") | ||
} | ||
|
||
return cInfo, nil | ||
} | ||
|
||
func flushCommitInfo(batch dbm.Batch, version int64, cInfo *types.CommitInfo) { | ||
func flushCommitInfo(batch dbm.Batch, cInfo *types.CommitInfo) { | ||
bz, err := cInfo.Marshal() | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
cInfoKey := fmt.Sprintf(commitInfoKeyFmt, version) | ||
batch.Set([]byte(cInfoKey), bz) | ||
cInfoKey := fmt.Sprintf(commitInfoKeyFmt, cInfo.Version) | ||
if err := batch.Set([]byte(cInfoKey), bz); err != nil { | ||
panic(err) | ||
} | ||
} | ||
|
||
func flushLatestVersion(batch dbm.Batch, version int64) { | ||
|
@@ -1057,5 +1059,7 @@ func flushLatestVersion(batch dbm.Batch, version int64) { | |
panic(err) | ||
} | ||
|
||
batch.Set([]byte(latestVersionKey), bz) | ||
if err := batch.Set([]byte(latestVersionKey), bz); err != nil { | ||
panic(err) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved down and renamed to
FlushAllPruningHeights