Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: correctly handle null blocks when detecting an expensive fork #7210

Merged
merged 4 commits into from
Aug 31, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 43 additions & 26 deletions chain/stmgr/call.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"

"github.com/filecoin-project/go-address"
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/go-state-types/crypto"
"github.com/ipfs/go-cid"
"go.opencensus.io/trace"
Expand All @@ -20,41 +21,49 @@ import (

var ErrExpensiveFork = errors.New("refusing explicit call due to state fork at epoch")

// Call applies the given message to the given tipset's parent state, at the epoch following the
// tipset's parent. In the presence of null blocks, the height at which the message is invoked may
// be less than the specified tipset.
//
// - If no tipset is specified, the first tipset without an expensive migration is used.
// - If executing a message at a given tipset would trigger an expensive migration, the call will
// fail with ErrExpensiveFork.
func (sm *StateManager) Call(ctx context.Context, msg *types.Message, ts *types.TipSet) (*api.InvocResult, error) {
ctx, span := trace.StartSpan(ctx, "statemanager.Call")
defer span.End()

var pheight abi.ChainEpoch = -1

// If no tipset is provided, try to find one without a fork.
if ts == nil {
ts = sm.cs.GetHeaviestTipSet()

// Search back till we find a height with no fork, or we reach the beginning.
for ts.Height() > 0 && sm.hasExpensiveFork(ctx, ts.Height()-1) {
var err error
ts, err = sm.cs.GetTipSetFromKey(ts.Parents())
for ts.Height() > 0 {
pts, err := sm.cs.GetTipSetFromKey(ts.Parents())
if err != nil {
return nil, xerrors.Errorf("failed to find a non-forking epoch: %w", err)
}
if !sm.hasExpensiveFork(pts.Height()) {
pheight = pts.Height()
break
}
ts = pts
}
} else {
pts, err := sm.cs.LoadTipSet(ts.Parents())
if err != nil {
return nil, xerrors.Errorf("failed to load parent tipset: %w", err)
}
pheight = pts.Height()
if sm.hasExpensiveFork(pheight) {
return nil, ErrExpensiveFork
}
}

bstate := ts.ParentState()
pts, err := sm.cs.LoadTipSet(ts.Parents())
if err != nil {
return nil, xerrors.Errorf("failed to load parent tipset: %w", err)
}
pheight := pts.Height()

// If we have to run an expensive migration, and we're not at genesis,
// return an error because the migration will take too long.
//
// We allow this at height 0 for at-genesis migrations (for testing).
if pheight > 0 && sm.hasExpensiveFork(ctx, pheight) {
return nil, ErrExpensiveFork
}

// Run the (not expensive) migration.
bstate, err = sm.handleStateForks(ctx, bstate, pheight, nil, ts)
bstate, err := sm.handleStateForks(ctx, bstate, pheight, nil, ts)
if err != nil {
return nil, fmt.Errorf("failed to handle fork: %w", err)
}
Expand Down Expand Up @@ -140,25 +149,33 @@ func (sm *StateManager) CallWithGas(ctx context.Context, msg *types.Message, pri
// run the fork logic in `sm.TipSetState`. We need the _current_
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a good reason why Call happens against tipset parent state and CallWithGas happens against tipset state? As far as I understand is what requires the introduction of hasExpensiveForkBetween. Beyond removing extra work it would also just be nice to standardize these two similar calls

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CallWithGas applies the message on-top-of the tipset to provide better gas estimation, I think. By executing it on-top-of the last tipset, instead of on-top-of the parent tipset state, we get the best gas estimates.

I'd prefer to always use the parent state, but I'm not sure what might break. I'm especially concerned that, e.g., it might (rarely) cause issues with window post:

  1. See we're at height X.
  2. Try to push the proof message.
  3. Fail because the window isn't open yet.

// height to have no fork, because we'll run it inside this
// function before executing the given message.
for ts.Height() > 0 && (sm.hasExpensiveFork(ctx, ts.Height()) || sm.hasExpensiveFork(ctx, ts.Height()-1)) {
var err error
ts, err = sm.cs.GetTipSetFromKey(ts.Parents())
for ts.Height() > 0 {
pts, err := sm.cs.GetTipSetFromKey(ts.Parents())
if err != nil {
return nil, xerrors.Errorf("failed to find a non-forking epoch: %w", err)
}
}
}
if !sm.hasExpensiveForkBetween(pts.Height(), ts.Height()+1) {
break
}

// When we're not at the genesis block, make sure we don't have an expensive migration.
if ts.Height() > 0 && (sm.hasExpensiveFork(ctx, ts.Height()) || sm.hasExpensiveFork(ctx, ts.Height()-1)) {
return nil, ErrExpensiveFork
ts = pts
}
} else if ts.Height() > 0 {
pts, err := sm.cs.GetTipSetFromKey(ts.Parents())
if err != nil {
return nil, xerrors.Errorf("failed to find a non-forking epoch: %w", err)
}
if sm.hasExpensiveForkBetween(pts.Height(), ts.Height()+1) {
return nil, ErrExpensiveFork
}
}

state, _, err := sm.TipSetState(ctx, ts)
if err != nil {
return nil, xerrors.Errorf("computing tipset state: %w", err)
}

// Technically, the tipset we're passing in here should be ts+1, but that may not exist.
state, err = sm.handleStateForks(ctx, state, ts.Height(), nil, ts)
if err != nil {
return nil, fmt.Errorf("failed to handle fork: %w", err)
Expand Down
1 change: 0 additions & 1 deletion chain/stmgr/execute.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ func (sm *StateManager) ApplyBlocks(ctx context.Context, parentEpoch abi.ChainEp
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: not at all your bug but reading the above code it looks like line 82's error message is wrong and confusing and makes sense to fix now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather not touch that for now. I don't think that error is consensus critical, but I'd rather not touch it.


// handle state forks
// XXX: The state tree
newState, err := sm.handleStateForks(ctx, pstate, i, em, ts)
if err != nil {
return cid.Undef, cid.Undef, xerrors.Errorf("error handling state forks: %w", err)
Expand Down
23 changes: 20 additions & 3 deletions chain/stmgr/forks.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,11 @@ type MigrationCache interface {
// - The oldState is the state produced by the upgrade epoch.
// - The returned newState is the new state that will be used by the next epoch.
// - The height is the upgrade epoch height (already executed).
// - The tipset is the tipset for the last non-null block before the upgrade. Do
// not assume that ts.Height() is the upgrade height.
// - The tipset is the first non-null tipset after the upgrade height (the tipset in
// which the upgrade is executed). Do not assume that ts.Height() is the upgrade height.
//
// NOTE: In StateCompute and CallWithGas, the passed tipset is actually the tipset _before_ the
// upgrade. The tipset should really only be used for referencing the "current chain".
type MigrationFunc func(
ctx context.Context,
sm *StateManager, cache MigrationCache,
Expand Down Expand Up @@ -208,7 +211,21 @@ func (sm *StateManager) handleStateForks(ctx context.Context, root cid.Cid, heig
return retCid, nil
}

func (sm *StateManager) hasExpensiveFork(ctx context.Context, height abi.ChainEpoch) bool {
// Returns true if executing the current tipset would trigger an expensive fork.
//
// - If the tipset is the genesis, this function always returns false.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As written now this invariant depends on the sm.expensiveUpgrades table which in reasonable scenarios could potentially contain "expensive" upgrades if say we did all migrations at the genesis epoch.

It might be better to override the table to always return false on 0 to maintain this invariant. Any network doing this would probably be doing very little work in the expensive migration since nothing has happened in state yet.

Also all existing callers seem to implicitly assume that this is the case anyway by only checking heights > 0 so why not make it official.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the function is called hasExpensiveFork, not hasExpensiveForkOrIsGenesis 😄. Basically, I don't want to surprise users.

At the moment, genesis matters to all callers because there's really no other option. But I'd like to keep that as an explicit decision.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I've fixed the issue in Call where I wasn't handling this consistently.

// - If inclusive is true, this function will also return true if applying a message on-top-of the
// tipset would trigger a fork.
func (sm *StateManager) hasExpensiveForkBetween(parent, height abi.ChainEpoch) bool {
for h := parent; h < height; h++ {
if _, ok := sm.expensiveUpgrades[h]; ok {
return true
}
}
return false
}

func (sm *StateManager) hasExpensiveFork(height abi.ChainEpoch) bool {
_, ok := sm.expensiveUpgrades[height]
return ok
}
Expand Down
40 changes: 37 additions & 3 deletions chain/stmgr/forks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,20 +242,35 @@ func TestForkHeightTriggers(t *testing.T) {
func TestForkRefuseCall(t *testing.T) {
logging.SetAllLoggers(logging.LevelInfo)

for after := 0; after < 3; after++ {
for before := 0; before < 3; before++ {
// Makes the lints happy...
after := after
before := before
t.Run(fmt.Sprintf("after:%d,before:%d", after, before), func(t *testing.T) {
testForkRefuseCall(t, before, after)
})
}
}

}
func testForkRefuseCall(t *testing.T, nullsBefore, nullsAfter int) {
ctx := context.TODO()

cg, err := gen.NewGenerator()
if err != nil {
t.Fatal(err)
}

var migrationCount int
sm, err := NewStateManagerWithUpgradeSchedule(
cg.ChainStore(), cg.StateManager().VMSys(), UpgradeSchedule{{
Network: network.Version1,
Expensive: true,
Height: testForkHeight,
Migration: func(ctx context.Context, sm *StateManager, cache MigrationCache, cb ExecMonitor,
root cid.Cid, height abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) {
migrationCount++
return root, nil
}}})
if err != nil {
Expand Down Expand Up @@ -292,14 +307,20 @@ func TestForkRefuseCall(t *testing.T) {
GasFeeCap: types.NewInt(0),
}

nullStart := abi.ChainEpoch(testForkHeight - nullsBefore)
nullLength := abi.ChainEpoch(nullsBefore + nullsAfter)

for i := 0; i < 50; i++ {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not your bug: I don't love that testForkHeight is a global and that this test relies on knowing that its value is 40 in order to actually test anything. Maybe make testForkHeight not a global, or change 50 to something like 2 * testForkHeight?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, being a global is intentional. But yeah, 50 being hard-coded is a problem.

ts, err := cg.NextTipSet()
pts := cg.CurTipset.TipSet()
skip := abi.ChainEpoch(0)
if pts.Height() == nullStart {
skip = nullLength
}
ts, err := cg.NextTipSetFromMiners(pts, cg.Miners, skip)
if err != nil {
t.Fatal(err)
}

pts, err := cg.ChainStore().LoadTipSet(ts.TipSet.TipSet().Parents())
require.NoError(t, err)
parentHeight := pts.Height()
currentHeight := ts.TipSet.TipSet().Height()

Expand All @@ -321,7 +342,20 @@ func TestForkRefuseCall(t *testing.T) {
require.NoError(t, err)
require.True(t, ret.MsgRct.ExitCode.IsSuccess())
}

// Calls without a tipset should walk back to the last non-fork tipset.
// We _verify_ that the migration wasn't run multiple times at the end of the
// test.
ret, err = sm.CallWithGas(ctx, m, nil, nil)
require.NoError(t, err)
require.True(t, ret.MsgRct.ExitCode.IsSuccess())

ret, err = sm.Call(ctx, m, nil)
require.NoError(t, err)
require.True(t, ret.MsgRct.ExitCode.IsSuccess())
}
// Make sure we didn't execute the migration multiple times.
require.Equal(t, migrationCount, 1)
}

func TestForkPreMigration(t *testing.T) {
Expand Down
5 changes: 3 additions & 2 deletions chain/stmgr/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,14 @@ func ComputeState(ctx context.Context, sm *StateManager, height abi.ChainEpoch,
}

for i := ts.Height(); i < height; i++ {
// handle state forks
// Technically, the tipset we're passing in here should be ts+1, but that may not exist.
base, err = sm.handleStateForks(ctx, base, i, &InvocationTracer{trace: &trace}, ts)
if err != nil {
return cid.Undef, nil, xerrors.Errorf("error handling state forks: %w", err)
}

// TODO: should we also run cron here?
// We intentionally don't run cron here, as we may be trying to look into the
// future. It's not guaranteed to be accurate... but that's fine.
}

r := store.NewChainRand(sm.cs, ts.Cids())
Expand Down