Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Capability Issue on Restart, Backport to v0.42 #9835

Merged
merged 14 commits into from
Aug 4, 2021
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ Ref: https://keepachangelog.com/en/1.0.0/

## Unreleased

### Bug Fixes

* [\#9835](https://github.com/cosmos/cosmos-sdk/pull/9835) Moved capability initialization logic to BeginBlocker to fix nondeterminsim issue mentioned in [\#9800](https://github.com/cosmos/cosmos-sdk/issues/9800). Applications must now include the capability module in their BeginBlocker order before any module that uses capabilities gets run.

### API Breaking Changes

* [\#9835](https://github.com/cosmos/cosmos-sdk/pull/9835) The `InitializeAndSeal` API has not changed, however it no longer initializes the in-memory state. `InitMemStore` has been introduced to serve this function, which will be called either in `InitChain` or `BeginBlock` (whichever is first after app start). Nodes may run this version on a network running 0.42.x, however, they must update their app.go files to include the capability module in their begin blockers.

### Client Breaking Changes

* [\#9781](https://github.com/cosmos/cosmos-sdk/pull/9781) Improve`withdraw-all-rewards` UX when broadcast mode `async` or `async` is used.
Expand Down
4 changes: 2 additions & 2 deletions simapp/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ func NewSimApp(
evidencetypes.StoreKey, ibctransfertypes.StoreKey, capabilitytypes.StoreKey,
)
tkeys := sdk.NewTransientStoreKeys(paramstypes.TStoreKey)
memKeys := sdk.NewMemoryStoreKeys(capabilitytypes.MemStoreKey)
memKeys := sdk.NewMemoryStoreKeys(capabilitytypes.MemStoreKey, "testing")

app := &SimApp{
BaseApp: bApp,
Expand Down Expand Up @@ -350,7 +350,7 @@ func NewSimApp(
// CanWithdrawInvariant invariant.
// NOTE: staking module is required if HistoricalEntries param > 0
app.mm.SetOrderBeginBlockers(
upgradetypes.ModuleName, minttypes.ModuleName, distrtypes.ModuleName, slashingtypes.ModuleName,
upgradetypes.ModuleName, capabilitytypes.ModuleName, minttypes.ModuleName, distrtypes.ModuleName, slashingtypes.ModuleName,
evidencetypes.ModuleName, stakingtypes.ModuleName, ibchost.ModuleName,
)
app.mm.SetOrderEndBlockers(crisistypes.ModuleName, govtypes.ModuleName, stakingtypes.ModuleName)
Expand Down
97 changes: 97 additions & 0 deletions x/capability/capability_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package capability_test

import (
"testing"

"github.com/stretchr/testify/suite"
abci "github.com/tendermint/tendermint/abci/types"
tmproto "github.com/tendermint/tendermint/proto/tendermint/types"

"github.com/cosmos/cosmos-sdk/codec"
"github.com/cosmos/cosmos-sdk/simapp"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/cosmos/cosmos-sdk/types/module"
banktypes "github.com/cosmos/cosmos-sdk/x/bank/types"
"github.com/cosmos/cosmos-sdk/x/capability"
"github.com/cosmos/cosmos-sdk/x/capability/keeper"
"github.com/cosmos/cosmos-sdk/x/capability/types"
)

type CapabilityTestSuite struct {
suite.Suite

cdc codec.Marshaler
ctx sdk.Context
app *simapp.SimApp
keeper *keeper.Keeper
module module.AppModule
}

func (suite *CapabilityTestSuite) SetupTest() {
checkTx := false
app := simapp.Setup(checkTx)
cdc := app.AppCodec()

// create new keeper so we can define custom scoping before init and seal
keeper := keeper.NewKeeper(cdc, app.GetKey(types.StoreKey), app.GetMemKey(types.MemStoreKey))

suite.app = app
suite.ctx = app.BaseApp.NewContext(checkTx, tmproto.Header{Height: 1})
suite.keeper = keeper
suite.cdc = cdc
suite.module = capability.NewAppModule(cdc, *keeper)
}

// The following test case mocks a specific bug discovered in https://github.com/cosmos/cosmos-sdk/issues/9800
// and ensures that the current code successfully fixes the issue.
func (suite *CapabilityTestSuite) TestInitializeMemStore() {
AdityaSripal marked this conversation as resolved.
Show resolved Hide resolved
sk1 := suite.keeper.ScopeToModule(banktypes.ModuleName)

cap1, err := sk1.NewCapability(suite.ctx, "transfer")
suite.Require().NoError(err)
suite.Require().NotNil(cap1)

// mock statesync by creating new keeper that shares persistent state but loses in-memory map
newKeeper := keeper.NewKeeper(suite.cdc, suite.app.GetKey(types.StoreKey), suite.app.GetMemKey("testing"))
newSk1 := newKeeper.ScopeToModule(banktypes.ModuleName)

// Mock App startup
ctx := suite.app.BaseApp.NewUncachedContext(false, tmproto.Header{})
newKeeper.InitializeAndSeal(ctx)
suite.Require().False(newKeeper.IsInitialized(ctx), "memstore initialized flag set before BeginBlock")

// Mock app beginblock and ensure that no gas has been consumed and memstore is initialized
ctx = suite.app.BaseApp.NewContext(false, tmproto.Header{}).WithBlockGasMeter(sdk.NewGasMeter(50))
prevGas := ctx.BlockGasMeter().GasConsumed()
restartedModule := capability.NewAppModule(suite.cdc, *newKeeper)
restartedModule.BeginBlock(ctx, abci.RequestBeginBlock{})
suite.Require().True(newKeeper.IsInitialized(ctx), "memstore initialized flag not set")
gasUsed := ctx.BlockGasMeter().GasConsumed()

suite.Require().Equal(prevGas, gasUsed, "beginblocker consumed gas during execution")

// Mock the first transaction getting capability and subsequently failing
// by using a cached context and discarding all cached writes.
cacheCtx, _ := ctx.CacheContext()
_, ok := newSk1.GetCapability(cacheCtx, "transfer")
suite.Require().True(ok)

// Ensure that the second transaction can still receive capability even if first tx fails.
ctx = suite.app.BaseApp.NewContext(false, tmproto.Header{})

cap1, ok = newSk1.GetCapability(ctx, "transfer")
suite.Require().True(ok)

// Ensure the capabilities don't get reinitialized on next BeginBlock
// by testing to see if capability returns same pointer
// also check that initialized flag is still set
restartedModule.BeginBlock(ctx, abci.RequestBeginBlock{})
recap, ok := newSk1.GetCapability(ctx, "transfer")
suite.Require().True(ok)
suite.Require().Equal(cap1, recap, "capabilities got reinitialized after second BeginBlock")
suite.Require().True(newKeeper.IsInitialized(ctx), "memstore initialized flag not set")
}

func TestCapabilityTestSuite(t *testing.T) {
suite.Run(t, new(CapabilityTestSuite))
}
5 changes: 3 additions & 2 deletions x/capability/genesis.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ func InitGenesis(ctx sdk.Context, k keeper.Keeper, genState types.GenesisState)
panic(err)
}

// set owners for each index and initialize capability
// set owners for each index
for _, genOwner := range genState.Owners {
k.SetOwners(ctx, genOwner.Index, genOwner.IndexOwners)
k.InitializeCapability(ctx, genOwner.Index, genOwner.IndexOwners)
}
// initialize in-memory capabilities
k.InitMemStore(ctx)
}
Comment on lines +20 to 22
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will be used if node is restarted from genesis (either a new chain or genesis restart)


// ExportGenesis returns the capability module's exported genesis.
Expand Down
59 changes: 59 additions & 0 deletions x/capability/genesis_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package capability_test

import (
"github.com/tendermint/tendermint/libs/log"
tmproto "github.com/tendermint/tendermint/proto/tendermint/types"
dbm "github.com/tendermint/tm-db"

"github.com/cosmos/cosmos-sdk/simapp"
sdk "github.com/cosmos/cosmos-sdk/types"
banktypes "github.com/cosmos/cosmos-sdk/x/bank/types"
"github.com/cosmos/cosmos-sdk/x/capability"
"github.com/cosmos/cosmos-sdk/x/capability/keeper"
"github.com/cosmos/cosmos-sdk/x/capability/types"
stakingtypes "github.com/cosmos/cosmos-sdk/x/staking/types"
)

func (suite *CapabilityTestSuite) TestGenesis() {
sk1 := suite.keeper.ScopeToModule(banktypes.ModuleName)
sk2 := suite.keeper.ScopeToModule(stakingtypes.ModuleName)

cap1, err := sk1.NewCapability(suite.ctx, "transfer")
suite.Require().NoError(err)
suite.Require().NotNil(cap1)

err = sk2.ClaimCapability(suite.ctx, cap1, "transfer")
suite.Require().NoError(err)

cap2, err := sk2.NewCapability(suite.ctx, "ica")
suite.Require().NoError(err)
suite.Require().NotNil(cap2)

genState := capability.ExportGenesis(suite.ctx, *suite.keeper)

// create new app that does not share persistent or in-memory state
// and initialize app from exported genesis state above.
db := dbm.NewMemDB()
encCdc := simapp.MakeTestEncodingConfig()
newApp := simapp.NewSimApp(log.NewNopLogger(), db, nil, true, map[int64]bool{}, simapp.DefaultNodeHome, 5, encCdc, simapp.EmptyAppOptions{})

newKeeper := keeper.NewKeeper(suite.cdc, newApp.GetKey(types.StoreKey), newApp.GetMemKey(types.MemStoreKey))
newSk1 := newKeeper.ScopeToModule(banktypes.ModuleName)
newSk2 := newKeeper.ScopeToModule(stakingtypes.ModuleName)
deliverCtx, _ := newApp.BaseApp.NewUncachedContext(false, tmproto.Header{}).WithBlockGasMeter(sdk.NewInfiniteGasMeter()).CacheContext()

capability.InitGenesis(deliverCtx, *newKeeper, *genState)

// check that all previous capabilities exist in new app after InitGenesis
sk1Cap1, ok := newSk1.GetCapability(deliverCtx, "transfer")
suite.Require().True(ok, "could not get first capability after genesis on first ScopedKeeper")
suite.Require().Equal(*cap1, *sk1Cap1, "capability values not equal on first ScopedKeeper")

sk2Cap1, ok := newSk2.GetCapability(deliverCtx, "transfer")
suite.Require().True(ok, "could not get first capability after genesis on first ScopedKeeper")
suite.Require().Equal(*cap1, *sk2Cap1, "capability values not equal on first ScopedKeeper")

sk2Cap2, ok := newSk2.GetCapability(deliverCtx, "ica")
suite.Require().True(ok, "could not get second capability after genesis on second ScopedKeeper")
suite.Require().Equal(*cap2, *sk2Cap2, "capability values not equal on second ScopedKeeper")
}
74 changes: 35 additions & 39 deletions x/capability/keeper/keeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@ import (
"github.com/cosmos/cosmos-sdk/x/capability/types"
)

// initialized is a global variable used by GetCapability to ensure that the memory store
// and capability map are correctly populated. A state-synced node may copy over all the persistent
// state and start running the application without having the in-memory state required for x/capability.
// Thus, we must initialized the memory stores on-the-fly during tx execution once the first GetCapability
// is called.
// This is a temporary fix and should be replaced by a more robust solution in the next breaking release.
var initialized = false

type (
// Keeper defines the capability module's keeper. It is responsible for provisioning,
// tracking, and authenticating capabilities at runtime. During application
Expand Down Expand Up @@ -97,38 +89,58 @@ func (k *Keeper) ScopeToModule(moduleName string) ScopedKeeper {
}
}

// InitializeAndSeal loads all capabilities from the persistent KVStore into the
// in-memory store and seals the keeper to prevent further modules from creating
// a scoped keeper. InitializeAndSeal must be called once after the application
// state is loaded.
// InitializeAndSeal seals the keeper to prevent further modules from creating
// a scoped keeper. It also panics if the memory store is not of storetype `StoreTypeMemory`.
func (k *Keeper) InitializeAndSeal(ctx sdk.Context) {
if k.sealed {
panic("cannot initialize and seal an already sealed capability keeper")
}

memStore := ctx.KVStore(k.memKey)
k.sealed = true
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so InitializeAndSeal no longer initializes any capabilities, it just seals the keeper and ensures the mem store is a mem store. Should the godoc be updated?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct and yes, I break this API on the breaking-changes pr

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function doesn't do a full initialization - it doesn't call InitMemStore. Maybe let's make a note, that this function should be called after InitMemStore. So we should update the comments of this function and InitMemStore function. And in 0.43 we can maybe rework it to make it more clear - by renaming this function to k.Seal(ctx)

BTW: we don't need to check the store type - this should be done in InitMemStore.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that you are sealing in the InitMemStore. So we should update the comments of this function and InitMemStore. And in 0.43 we can maybe rework it to make it more clear.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, this is a breaking change of the state machine.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No this shouldn't be breaking the state machine. InitializeCapability only writes to the memstores and in-memory go map.

The memory store does not get committed along with the rest of the app state. It is in-memory local storage for each node. So changing things inside of it does not break the state machine.

https://github.com/cosmos/cosmos-sdk/blob/master/store/mem/store.go

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The memory store does not

I know. Breaking state machine doesn't necessary mean to break the written state. To illustrate this issue:

  1. Node A will run app with 0.42.8
  2. Node B will run same app (same app manager config) but using 0.42.9

Node A will have all capabilities initialized, Node B will not have.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No this PR just changes where/when the capabilities are initialized.

In 0.42.8, the capabilities are initialized in app start NewApp, and then reinitialized on the first transaction that calls GetCapability.

In 0.42.9, the capabilities are initialized at the first abci method after startup (either InitChain or BeginBlock).

In either case, the capabilities are initialized before they are requested in a transaction, which is what is necessary for keeping nodes in sync.

There is a bug in 0.42.7 and 0.42.8, so there are situations where its state becomes out of sync with a node from 0.42.6.

However, comparing a node on 0.42.6 to a node on 0.42.9. They may initialize the in-memory capabilities in different places; however both will be fully initialized by the time the capability module gets used and thus will authenticate and reject the same transactions and be perfectly in sync.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The behavior of InitializeCapability is different - so an app will need to make more changes in the code than bumping a version. IMHO this is a breaking change - we signal to the app that bumping a version is not enough.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes just bumping the version will not be enough. You will also have to make changes to app.go. But it should be clear that you can run gaia with 0.42.6 and 0.42.9 on the same network and there will be no state divergence


// InitMemStore will initialize the memory store if it hasn't been initialized yet.
// The function is safe to be called multiple times.
// InitMemStore must be called every time the app starts before the keeper is used (so
// `BeginBlock` or `InitChain` - whichever is first). We need access to the store so we
// can't initialize it in a constructor.
func (k *Keeper) InitMemStore(ctx sdk.Context) {
robert-zaremba marked this conversation as resolved.
Show resolved Hide resolved
// create context with no block gas meter to ensure we do not consume gas during local initialization logic.
noGasCtx := ctx.WithBlockGasMeter(sdk.NewInfiniteGasMeter())

memStore := noGasCtx.KVStore(k.memKey)
memStoreType := memStore.GetStoreType()

if memStoreType != sdk.StoreTypeMemory {
panic(fmt.Sprintf("invalid memory store type; got %s, expected: %s", memStoreType, sdk.StoreTypeMemory))
}

prefixStore := prefix.NewStore(ctx.KVStore(k.storeKey), types.KeyPrefixIndexCapability)
iterator := sdk.KVStorePrefixIterator(prefixStore, nil)
// check if memory store has not been initialized yet by checking if initialized flag is nil.
if !k.IsInitialized(noGasCtx) {
prefixStore := prefix.NewStore(noGasCtx.KVStore(k.storeKey), types.KeyPrefixIndexCapability)
iterator := sdk.KVStorePrefixIterator(prefixStore, nil)

// initialize the in-memory store for all persisted capabilities
defer iterator.Close()
// initialize the in-memory store for all persisted capabilities
defer iterator.Close()

for ; iterator.Valid(); iterator.Next() {
index := types.IndexFromKey(iterator.Key())
for ; iterator.Valid(); iterator.Next() {
index := types.IndexFromKey(iterator.Key())

var capOwners types.CapabilityOwners
var capOwners types.CapabilityOwners

k.cdc.MustUnmarshalBinaryBare(iterator.Value(), &capOwners)
k.InitializeCapability(ctx, index, capOwners)
k.cdc.MustUnmarshalBinaryBare(iterator.Value(), &capOwners)
k.InitializeCapability(noGasCtx, index, capOwners)
}

// set the initialized flag so we don't rerun initialization logic
memStore.Set(types.KeyMemInitialized, []byte{1})
}
robert-zaremba marked this conversation as resolved.
Show resolved Hide resolved
}

k.sealed = true
// IsInitialized returns true if the keeper is properly initialized, and false otherwise
func (k *Keeper) IsInitialized(ctx sdk.Context) bool {
memStore := ctx.KVStore(k.memKey)
return memStore.Get(types.KeyMemInitialized) != nil
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we use a keeper variable to cache the state? Now we are doing an extra state hit all the time. Once the response is true, we can cache it in the Keeper private variable (eg k.initialized).

Copy link
Member Author

@AdityaSripal AdityaSripal Aug 3, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, but note this is a read to an in-memory store, so it's not nearly as expensive as an I/O read. Maybe caching with private variable will be a bit faster so I can do this if desired

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, I made this automatic by looking at the code. I think it's "nice to have"

}

// InitializeIndex sets the index to one (or greater) in InitChain according
Expand Down Expand Up @@ -350,22 +362,6 @@ func (sk ScopedKeeper) ReleaseCapability(ctx sdk.Context, cap *types.Capability)
// by name. The module is not allowed to retrieve capabilities which it does not
// own.
func (sk ScopedKeeper) GetCapability(ctx sdk.Context, name string) (*types.Capability, bool) {
// Create a keeper that will set all in-memory mappings correctly into memstore and capmap if scoped keeper is not initialized yet.
// This ensures that the in-memory mappings are correctly filled in, in case this is a state-synced node.
// This is a temporary non-breaking fix, a future PR should store the reverse mapping in the persistent store and reconstruct forward mapping and capmap on the fly.
if !initialized {
// create context with infinite gas meter to avoid app state mismatch.
initCtx := ctx.WithGasMeter(sdk.NewInfiniteGasMeter())
k := Keeper{
cdc: sk.cdc,
storeKey: sk.storeKey,
memKey: sk.memKey,
capMap: sk.capMap,
}
k.InitializeAndSeal(initCtx)
initialized = true
}

if strings.TrimSpace(name) == "" {
return nil, false
}
Expand Down
3 changes: 3 additions & 0 deletions x/capability/keeper/keeper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/stretchr/testify/suite"
tmproto "github.com/tendermint/tendermint/proto/tendermint/types"

"github.com/cosmos/cosmos-sdk/codec"
"github.com/cosmos/cosmos-sdk/simapp"
sdk "github.com/cosmos/cosmos-sdk/types"
banktypes "github.com/cosmos/cosmos-sdk/x/bank/types"
Expand All @@ -18,6 +19,7 @@ import (
type KeeperTestSuite struct {
suite.Suite

cdc codec.Marshaler
ctx sdk.Context
app *simapp.SimApp
keeper *keeper.Keeper
Expand All @@ -34,6 +36,7 @@ func (suite *KeeperTestSuite) SetupTest() {
suite.app = app
suite.ctx = app.BaseApp.NewContext(checkTx, tmproto.Header{Height: 1})
suite.keeper = keeper
suite.cdc = cdc
}

func (suite *KeeperTestSuite) TestInitializeAndSeal() {
Expand Down
14 changes: 12 additions & 2 deletions x/capability/module.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"math/rand"
"time"

"github.com/gorilla/mux"
"github.com/grpc-ecosystem/grpc-gateway/runtime"
Expand All @@ -14,6 +15,7 @@ import (
"github.com/cosmos/cosmos-sdk/client"
"github.com/cosmos/cosmos-sdk/codec"
cdctypes "github.com/cosmos/cosmos-sdk/codec/types"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/cosmos/cosmos-sdk/types/module"
simtypes "github.com/cosmos/cosmos-sdk/types/simulation"
Expand Down Expand Up @@ -136,8 +138,16 @@ func (am AppModule) ExportGenesis(ctx sdk.Context, cdc codec.JSONMarshaler) json
return cdc.MustMarshalJSON(genState)
}

// BeginBlock executes all ABCI BeginBlock logic respective to the capability module.
func (am AppModule) BeginBlock(_ sdk.Context, _ abci.RequestBeginBlock) {}
// BeginBlocker will call InitMemStore to initialize the memory stores in the case
// that this is the first time the node is executing a block since restarting (wiping memory).
// In this case, the BeginBlocker method will reinitialize the memory stores locally, so that subsequent
// capability transactions will pass.
// Otherwise BeginBlocker performs a no-op.
func (am AppModule) BeginBlock(ctx sdk.Context, _ abci.RequestBeginBlock) {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker)

am.keeper.InitMemStore(ctx)
}

// EndBlock executes all ABCI EndBlock logic respective to the capability module. It
// returns no validator updates.
Expand Down
3 changes: 3 additions & 0 deletions x/capability/types/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ var (
// KeyPrefixIndexCapability defines a key prefix that stores index to capability
// name mappings.
KeyPrefixIndexCapability = []byte("capability_index")

// KeyMemInitialized defines the key that stores the initialized flag in the memory store
KeyMemInitialized = []byte("mem_initialized")
)

// RevCapabilityKey returns a reverse lookup key for a given module and capability
Expand Down