Skip to content
This repository has been archived by the owner on May 21, 2024. It is now read-only.

Commit

Permalink
Write state diff to CSV (#2)
Browse files Browse the repository at this point in the history
* port statediff from https://github.com/jpmorganchase/quorum/blob/9b7fd9af8082795eeeb6863d9746f12b82dd5078/statediff/statediff.go; minor fixes

* integrating state diff extracting, building, and persisting into geth processes

* work towards persisting created statediffs in ipfs; based off github.com/vulcanize/eth-block-extractor

* Add a state diff service

* Remove diff extractor from blockchain

* Update imports

* Move statediff on/off check to geth cmd config

* Update starting state diff service

* Add debugging logs for creating diff

* Add statediff extractor and builder tests and small refactoring

* Start to write statediff to a CSV

* Restructure statediff directory

* Pull CSV publishing methods into their own file

* Reformatting due to go fmt

* Add gomega to vendor dir

* Remove testing focuses

* Update statediff tests to use golang test pkg

instead of ginkgo

- builder_test
- extractor_test
- publisher_test

* Use hexutil.Encode instead of deprecated common.ToHex

* Remove OldValue from DiffBigInt and DiffUint64 fields

* Update builder test

* Remove old storage value from updated accounts

* Remove old values from created/deleted accounts

* Update publisher to account for only storing current account values

* Update service loop and fetching previous block

* Update testing

- remove statediff ginkgo test suite file
- move mocks to their own dir

* Updates per go fmt

* Updates to tests

* Pass statediff mode and path in through cli

* Return filename from publisher

* Remove some duplication in builder

* Remove code field from state diff output

this is the contract byte code, and it can still be obtained by querying
the db by the codeHash

* Consolidate acct diff structs for updated & updated/deleted accts

* Include block number in csv filename

* Clean up error logging

* Cleanup formatting, spelling, etc

* Address PR comments

* Add contract address and storage value to csv

* Refactor accumulating account row in csv publisher

* Add DiffStorage struct

* Add storage key to csv

* Address PR comments

* Fix publisher to include rows for accounts that don't have store updates

* Update builder test after merging in release/1.8

* Update test contract to include storage on contract intialization

- so that we're able to test that storage diffing works for created and
deleted accounts (not just updated accounts).

* Factor out a common trie iterator method in builder
  • Loading branch information
elizabethengelman committed Dec 30, 2019
1 parent b9bac1f commit 00e0056
Show file tree
Hide file tree
Showing 89 changed files with 6,464 additions and 6 deletions.
4 changes: 4 additions & 0 deletions cmd/geth/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ func makeFullNode(ctx *cli.Context) *node.Node {
if cfg.Ethstats.URL != "" {
utils.RegisterEthStatsService(stack, cfg.Ethstats.URL)
}

if ctx.GlobalBool(utils.StateDiffFlag.Name) {
utils.RegisterStateDiffService(stack, ctx)
}
return stack
}

Expand Down
3 changes: 3 additions & 0 deletions cmd/geth/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ var (
utils.GpoPercentileFlag,
utils.EWASMInterpreterFlag,
utils.EVMInterpreterFlag,
utils.StateDiffFlag,
utils.StateDiffModeFlag,
utils.StateDiffPathFlag,
configFileFlag,
}

Expand Down
8 changes: 8 additions & 0 deletions cmd/geth/usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,14 @@ var AppHelpFlagGroups = []flagGroup{
utils.MinerLegacyExtraDataFlag,
},
},
{
Name: "STATE DIFF",
Flags: []cli.Flag{
utils.StateDiffFlag,
utils.StateDiffModeFlag,
utils.StateDiffPathFlag,
},
},
{
Name: "MISC",
},
Expand Down
47 changes: 47 additions & 0 deletions cmd/utils/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ import (
whisper "github.com/ethereum/go-ethereum/whisper/whisperv6"
pcsclite "github.com/gballet/go-libpcsclite"
cli "gopkg.in/urfave/cli.v1"
"github.com/ethereum/go-ethereum/statediff/service"
"github.com/ethereum/go-ethereum/statediff"
)

var (
Expand Down Expand Up @@ -755,6 +757,23 @@ var (
Usage: "External EVM configuration (default = built-in interpreter)",
Value: "",
}

StateDiffFlag = cli.BoolFlag{
Name: "statediff",
Usage: "Enables the calculation of state diffs between each block, persists these state diffs the configured persistence mode.",
}

StateDiffModeFlag = cli.StringFlag{
Name: "statediff.mode",
Usage: "Enables the user to determine which persistence mode they'd like to store the state diffs in.",
Value: "csv",
}

StateDiffPathFlag = cli.StringFlag{
Name: "statediff.path",
Usage: "Enables the user to determine where to persist the state diffs.",
Value: ".",
}
)

// MakeDataDir retrieves the currently requested data directory, terminating
Expand Down Expand Up @@ -1613,6 +1632,34 @@ func RegisterGraphQLService(stack *node.Node, endpoint string, cors, vhosts []st
}
}

func RegisterStateDiffService(stack *node.Node, ctx *cli.Context) {
//based on the context, if path and mode are set, update the config here
//otherwise pass in an empty config

modeFlag := ctx.GlobalString(StateDiffModeFlag.Name)
mode, err := statediff.NewMode(modeFlag)
if err != nil {
Fatalf("Failed to register State Diff Service", err)
}

path := ctx.GlobalString(StateDiffPathFlag.Name)

config := statediff.Config{
Mode: mode,
Path: path,
}

if err := stack.Register(func(ctx *node.ServiceContext) (node.Service, error) {
var ethServ *eth.Ethereum
ctx.Service(&ethServ)
chainDb := ethServ.ChainDb()
blockChain := ethServ.BlockChain()
return service.NewStateDiffService(chainDb, blockChain, config)
}); err != nil {
Fatalf("Failed to register State Diff Service", err)
}
}

func SetupMetrics(ctx *cli.Context) {
if metrics.Enabled {
log.Info("Enabling metrics collection")
Expand Down
272 changes: 272 additions & 0 deletions statediff/builder/builder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
// Copyright 2015 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

// Contains a batch of utility type declarations used by the tests. As the node
// operates on unique types, a lot of them are needed to check various features.

package builder

import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
)

type Builder interface {
BuildStateDiff(oldStateRoot, newStateRoot common.Hash, blockNumber int64, blockHash common.Hash) (*StateDiff, error)
}

type builder struct {
chainDB ethdb.Database
trieDB *trie.Database
cachedTrie *trie.Trie
}

func NewBuilder(db ethdb.Database) *builder {
return &builder{
chainDB: db,
trieDB: trie.NewDatabase(db),
}
}

func (sdb *builder) BuildStateDiff(oldStateRoot, newStateRoot common.Hash, blockNumber int64, blockHash common.Hash) (*StateDiff, error) {
// Generate tries for old and new states
oldTrie, err := trie.New(oldStateRoot, sdb.trieDB)
if err != nil {
log.Error("Error creating trie for oldStateRoot", "error", err)
return nil, err
}
newTrie, err := trie.New(newStateRoot, sdb.trieDB)
if err != nil {
log.Error("Error creating trie for newStateRoot", "error", err)
return nil, err
}

// Find created accounts
oldIt := oldTrie.NodeIterator([]byte{})
newIt := newTrie.NodeIterator([]byte{})
creations, err := sdb.collectDiffNodes(oldIt, newIt)
if err != nil {
log.Error("Error collecting creation diff nodes", "error", err)
return nil, err
}

// Find deleted accounts
oldIt = oldTrie.NodeIterator([]byte{})
newIt = newTrie.NodeIterator([]byte{})
deletions, err := sdb.collectDiffNodes(newIt, oldIt)
if err != nil {
log.Error("Error collecting deletion diff nodes", "error", err)
return nil, err
}

// Find all the diffed keys
createKeys := sortKeys(creations)
deleteKeys := sortKeys(deletions)
updatedKeys := findIntersection(createKeys, deleteKeys)

// Build and return the statediff
updatedAccounts, err := sdb.buildDiffIncremental(creations, deletions, updatedKeys)
if err != nil {
log.Error("Error building diff for updated accounts", "error", err)
return nil, err
}
createdAccounts, err := sdb.buildDiffEventual(creations)
if err != nil {
log.Error("Error building diff for created accounts", "error", err)
return nil, err
}
deletedAccounts, err := sdb.buildDiffEventual(deletions)
if err != nil {
log.Error("Error building diff for deleted accounts", "error", err)
return nil, err
}

return &StateDiff{
BlockNumber: blockNumber,
BlockHash: blockHash,
CreatedAccounts: createdAccounts,
DeletedAccounts: deletedAccounts,
UpdatedAccounts: updatedAccounts,
}, nil
}

func (sdb *builder) collectDiffNodes(a, b trie.NodeIterator) (map[common.Address]*state.Account, error) {
var diffAccounts = make(map[common.Address]*state.Account)
it, _ := trie.NewDifferenceIterator(a, b)

for {
log.Debug("Current Path and Hash", "path", pathToStr(it), "hashold", common.Hash(it.Hash()))
if it.Leaf() {

// lookup address
path := make([]byte, len(it.Path())-1)
copy(path, it.Path())
addr, err := sdb.addressByPath(path)
if err != nil {
log.Error("Error looking up address via path", "path", path, "error", err)
return nil, err
}

// lookup account state
var account state.Account
if err := rlp.DecodeBytes(it.LeafBlob(), &account); err != nil {
log.Error("Error looking up account via address", "address", addr, "error", err)
return nil, err
}

// record account to diffs (creation if we are looking at new - old; deletion if old - new)
log.Debug("Account lookup successful", "address", addr, "account", account)
diffAccounts[*addr] = &account
}
cont := it.Next(true)
if !cont {
break
}
}

return diffAccounts, nil
}

func (sdb *builder) buildDiffEventual(accounts map[common.Address]*state.Account) (map[common.Address]AccountDiff, error) {
accountDiffs := make(map[common.Address]AccountDiff)
for addr, val := range accounts {
sr := val.Root
storageDiffs, err := sdb.buildStorageDiffsEventual(sr)
if err != nil {
log.Error("Failed building eventual storage diffs", "Address", addr, "error", err)
return nil, err
}

codeHash := hexutil.Encode(val.CodeHash)
hexRoot := val.Root.Hex()
nonce := DiffUint64{Value: &val.Nonce}
balance := DiffBigInt{Value: val.Balance}
contractRoot := DiffString{Value: &hexRoot}
accountDiffs[addr] = AccountDiff{
Nonce: nonce,
Balance: balance,
CodeHash: codeHash,
ContractRoot: contractRoot,
Storage: storageDiffs,
}
}

return accountDiffs, nil
}

func (sdb *builder) buildDiffIncremental(creations map[common.Address]*state.Account, deletions map[common.Address]*state.Account, updatedKeys []string) (map[common.Address]AccountDiff, error) {
updatedAccounts := make(map[common.Address]AccountDiff)
for _, val := range updatedKeys {
createdAcc := creations[common.HexToAddress(val)]
deletedAcc := deletions[common.HexToAddress(val)]
oldSR := deletedAcc.Root
newSR := createdAcc.Root
if storageDiffs, err := sdb.buildStorageDiffsIncremental(oldSR, newSR); err != nil {
log.Error("Failed building storage diffs", "Address", val, "error", err)
return nil, err
} else {
nonce := DiffUint64{Value: &createdAcc.Nonce}
balance := DiffBigInt{Value: createdAcc.Balance}
codeHash := hexutil.Encode(createdAcc.CodeHash)

nHexRoot := createdAcc.Root.Hex()
contractRoot := DiffString{Value: &nHexRoot}

updatedAccounts[common.HexToAddress(val)] = AccountDiff{
Nonce: nonce,
Balance: balance,
CodeHash: codeHash,
ContractRoot: contractRoot,
Storage: storageDiffs,
}
delete(creations, common.HexToAddress(val))
delete(deletions, common.HexToAddress(val))
}
}
return updatedAccounts, nil
}

func (sdb *builder) buildStorageDiffsEventual(sr common.Hash) (map[string]DiffStorage, error) {
log.Debug("Storage Root For Eventual Diff", "root", sr.Hex())
sTrie, err := trie.New(sr, sdb.trieDB)
if err != nil {
log.Info("error in build storage diff eventual", "error", err)
return nil, err
}
it := sTrie.NodeIterator(make([]byte, 0))
storageDiffs := buildStorageDiffsFromTrie(it)
return storageDiffs, nil
}

func (sdb *builder) buildStorageDiffsIncremental(oldSR common.Hash, newSR common.Hash) (map[string]DiffStorage, error) {
log.Debug("Storage Roots for Incremental Diff", "old", oldSR.Hex(), "new", newSR.Hex())
oldTrie, err := trie.New(oldSR, sdb.trieDB)
if err != nil {
return nil, err
}
newTrie, err := trie.New(newSR, sdb.trieDB)
if err != nil {
return nil, err
}

oldIt := oldTrie.NodeIterator(make([]byte, 0))
newIt := newTrie.NodeIterator(make([]byte, 0))
it, _ := trie.NewDifferenceIterator(oldIt, newIt)
storageDiffs := buildStorageDiffsFromTrie(it)

return storageDiffs, nil
}

func buildStorageDiffsFromTrie(it trie.NodeIterator) map[string]DiffStorage {
storageDiffs := make(map[string]DiffStorage)
for {
log.Debug("Iterating over state at path ", "path", pathToStr(it))
if it.Leaf() {
log.Debug("Found leaf in storage", "path", pathToStr(it))
path := pathToStr(it)
storageKey:= hexutil.Encode(it.LeafKey())
storageValue := hexutil.Encode(it.LeafBlob())
storageDiffs[path] = DiffStorage{
Key: &storageKey,
Value: &storageValue,
}
}

cont := it.Next(true)
if !cont {
break
}
}

return storageDiffs
}

func (sdb *builder) addressByPath(path []byte) (*common.Address, error) {
log.Debug("Looking up address from path", "path", hexutil.Encode(append([]byte("secure-key-"), path...)))
if addrBytes, err := sdb.chainDB.Get(append([]byte("secure-key-"), hexToKeyBytes(path)...)); err != nil {
log.Error("Error looking up address via path", "path", hexutil.Encode(append([]byte("secure-key-"), path...)), "error", err)
return nil, err
} else {
addr := common.BytesToAddress(addrBytes)
log.Debug("Address found", "Address", addr)
return &addr, nil
}
}
Loading

0 comments on commit 00e0056

Please sign in to comment.