Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core,les: headerchain import in batches #21471

Merged
merged 9 commits into from
Dec 9, 2020
8 changes: 2 additions & 6 deletions core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -2438,12 +2438,8 @@ func (bc *BlockChain) InsertHeaderChain(chain []*types.Header, checkFreq int) (i

bc.wg.Add(1)
defer bc.wg.Done()

whFunc := func(header *types.Header) error {
_, err := bc.hc.WriteHeader(header)
return err
}
return bc.hc.InsertHeaderChain(chain, whFunc, start)
_, err := bc.hc.InsertHeaderChain(chain, start)
return 0, err
}

// CurrentHeader retrieves the current head header of the canonical chain. The
Expand Down
262 changes: 152 additions & 110 deletions core/headerchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,107 +129,163 @@ func (hc *HeaderChain) GetBlockNumber(hash common.Hash) *uint64 {
return number
}

// WriteHeader writes a header into the local chain, given that its parent is
// already known. If the total difficulty of the newly inserted header becomes
// greater than the current known TD, the canonical chain is re-routed.
type headerWriteResult struct {
status WriteStatus
ignored int
imported int
lastHash common.Hash
lastHeader *types.Header
}

// WriteHeaders writes a chain of headers into the local chain, given that the parents
// are already known. If the total difficulty of the newly inserted chain becomes
// greater than the current known TD, the canonical chain is reorged.
//
// Note: This method is not concurrent-safe with inserting blocks simultaneously
// into the chain, as side effects caused by reorganisations cannot be emulated
// without the real blocks. Hence, writing headers directly should only be done
// in two scenarios: pure-header mode of operation (light clients), or properly
// separated header/block phases (non-archive clients).
func (hc *HeaderChain) WriteHeader(header *types.Header) (status WriteStatus, err error) {
// Cache some values to prevent constant recalculation
func (hc *HeaderChain) writeHeaders(headers []*types.Header) (result *headerWriteResult, err error) {
if len(headers) == 0 {
return &headerWriteResult{}, nil
}
ptd := hc.GetTd(headers[0].ParentHash, headers[0].Number.Uint64()-1)
if ptd == nil {
return &headerWriteResult{}, consensus.ErrUnknownAncestor
}
var (
hash = header.Hash()
number = header.Number.Uint64()
lastNumber = headers[0].Number.Uint64() - 1 // Last successfully imported number
lastHash = headers[0].ParentHash // Last imported header hash
newTD = new(big.Int).Set(ptd) // Total difficulty of inserted chain

lastHeader *types.Header
inserted []numberHash // Ephemeral lookup of number/hash for the chain
firstInserted = -1 // Index of the first non-ignored header
)
// Calculate the total difficulty of the header
ptd := hc.GetTd(header.ParentHash, number-1)
if ptd == nil {
return NonStatTy, consensus.ErrUnknownAncestor
}
head := hc.CurrentHeader().Number.Uint64()
localTd := hc.GetTd(hc.currentHeaderHash, head)
externTd := new(big.Int).Add(header.Difficulty, ptd)

// Irrelevant of the canonical status, write the td and header to the database
//
// Note all the components of header(td, hash->number index and header) should
// be written atomically.
headerBatch := hc.chainDb.NewBatch()
rawdb.WriteTd(headerBatch, hash, number, externTd)
rawdb.WriteHeader(headerBatch, header)
if err := headerBatch.Write(); err != nil {
log.Crit("Failed to write header into disk", "err", err)

batch := hc.chainDb.NewBatch()
for i, header := range headers {
hash, number := header.Hash(), header.Number.Uint64()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Random thought: since we've already done ValidateHeaders, we know that:

  • The headers are valid,
  • The headers are consecutive, so headers[i].Hash() is guaranteed to be headers[i+1].parentHash.

Which means that we only need to hash the very last header here, the rest we can pick from the child

newTD.Add(newTD, header.Difficulty)

// If the header is already known, skip it, otherwise store
if !hc.HasHeader(hash, number) {
// Irrelevant of the canonical status, write the TD and header to the database.
rawdb.WriteTd(batch, hash, number, newTD)
hc.tdCache.Add(hash, new(big.Int).Set(newTD))

rawdb.WriteHeader(batch, header)
inserted = append(inserted, numberHash{number, hash})
hc.headerCache.Add(hash, header)
hc.numberCache.Add(hash, number)
if firstInserted < 0 {
firstInserted = i
}
}
lastHeader, lastHash, lastNumber = header, hash, number
}

// Skip the slow disk write of all headers if interrupted.
if hc.procInterrupt() {
log.Debug("Premature abort during headers import")
return &headerWriteResult{}, errors.New("aborted")
}
// Commit to disk!
if err := batch.Write(); err != nil {
log.Crit("Failed to write headers", "error", err)
}
batch.Reset()

var (
head = hc.CurrentHeader().Number.Uint64()
localTD = hc.GetTd(hc.currentHeaderHash, head)
status = SideStatTy
)
// If the total difficulty is higher than our known, add it to the canonical chain
// Second clause in the if statement reduces the vulnerability to selfish mining.
// Please refer to http://www.cs.cornell.edu/~ie53/publications/btcProcFC.pdf
reorg := externTd.Cmp(localTd) > 0
if !reorg && externTd.Cmp(localTd) == 0 {
if header.Number.Uint64() < head {
reorg := newTD.Cmp(localTD) > 0
if !reorg && newTD.Cmp(localTD) == 0 {
if lastNumber < head {
reorg = true
} else if header.Number.Uint64() == head {
} else if lastNumber == head {
reorg = mrand.Float64() < 0.5
}
}
// If the parent of the (first) block is already the canon header,
// we don't have to go backwards to delete canon blocks, but
// simply pile them onto the existing chain
chainAlreadyCanon := headers[0].ParentHash == hc.currentHeaderHash
if reorg {
// If the header can be added into canonical chain, adjust the
// header chain markers(canonical indexes and head header flag).
//
// Note all markers should be written atomically.

// Delete any canonical number assignments above the new head
markerBatch := hc.chainDb.NewBatch()
for i := number + 1; ; i++ {
hash := rawdb.ReadCanonicalHash(hc.chainDb, i)
if hash == (common.Hash{}) {
break
markerBatch := batch // we can reuse the batch to keep allocs down
if !chainAlreadyCanon {
// Delete any canonical number assignments above the new head
for i := lastNumber + 1; ; i++ {
hash := rawdb.ReadCanonicalHash(hc.chainDb, i)
if hash == (common.Hash{}) {
break
}
rawdb.DeleteCanonicalHash(markerBatch, i)
}
// Overwrite any stale canonical number assignments, going
// backwards from the first header in this import
var (
headHash = headers[0].ParentHash // inserted[0].parent?
headNumber = headers[0].Number.Uint64() - 1 // inserted[0].num-1 ?
headHeader = hc.GetHeader(headHash, headNumber)
)
for rawdb.ReadCanonicalHash(hc.chainDb, headNumber) != headHash {
rawdb.WriteCanonicalHash(markerBatch, headHash, headNumber)
headHash = headHeader.ParentHash
headNumber = headHeader.Number.Uint64() - 1
headHeader = hc.GetHeader(headHash, headNumber)
}
// If some of the older headers were already known, but obtained canon-status
// during this import batch, then we need to write that now
// Further down, we continue writing the staus for the ones that
// were not already known
for i := 0; i < firstInserted; i++ {
hash := headers[i].Hash()
num := headers[i].Number.Uint64()
rawdb.WriteCanonicalHash(markerBatch, hash, num)
rawdb.WriteHeadHeaderHash(markerBatch, hash)
}
rawdb.DeleteCanonicalHash(markerBatch, i)
}

// Overwrite any stale canonical number assignments
var (
headHash = header.ParentHash
headNumber = header.Number.Uint64() - 1
headHeader = hc.GetHeader(headHash, headNumber)
)
for rawdb.ReadCanonicalHash(hc.chainDb, headNumber) != headHash {
rawdb.WriteCanonicalHash(markerBatch, headHash, headNumber)

headHash = headHeader.ParentHash
headNumber = headHeader.Number.Uint64() - 1
headHeader = hc.GetHeader(headHash, headNumber)
// Extend the canonical chain with the new headers
for _, hn := range inserted {
rawdb.WriteCanonicalHash(markerBatch, hn.hash, hn.number)
rawdb.WriteHeadHeaderHash(markerBatch, hn.hash)
}
// Extend the canonical chain with the new header
rawdb.WriteCanonicalHash(markerBatch, hash, number)
rawdb.WriteHeadHeaderHash(markerBatch, hash)
if err := markerBatch.Write(); err != nil {
log.Crit("Failed to write header markers into disk", "err", err)
}
markerBatch.Reset()
// Last step update all in-memory head header markers
hc.currentHeaderHash = hash
hc.currentHeader.Store(types.CopyHeader(header))
headHeaderGauge.Update(header.Number.Int64())
hc.currentHeaderHash = lastHash
hc.currentHeader.Store(types.CopyHeader(lastHeader))
headHeaderGauge.Update(lastHeader.Number.Int64())

// Chain status is canonical since this insert was a reorg.
// Note that all inserts which have higher TD than existing are 'reorg'.
status = CanonStatTy
} else {
status = SideStatTy
}
hc.tdCache.Add(hash, externTd)
hc.headerCache.Add(hash, header)
hc.numberCache.Add(hash, number)
return
}

// WhCallback is a callback function for inserting individual headers.
// A callback is used for two reasons: first, in a LightChain, status should be
// processed and light chain events sent, while in a BlockChain this is not
// necessary since chain events are sent after inserting blocks. Second, the
// header writes should be protected by the parent chain mutex individually.
type WhCallback func(*types.Header) error
if len(inserted) == 0 {
status = NonStatTy
}
return &headerWriteResult{
status: status,
ignored: len(headers) - len(inserted),
imported: len(inserted),
lastHash: lastHash,
lastHeader: lastHeader,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we can drop the lasthash. lastHeader contains everything. Besides, the hash should already be cached, I guess no reason to keep it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

types.Header does not cache the hash, only types.Block does

}, nil
}

func (hc *HeaderChain) ValidateHeaderChain(chain []*types.Header, checkFreq int) (int, error) {
// Do a sanity check that the provided chain is actually ordered and linked
Expand Down Expand Up @@ -282,55 +338,41 @@ func (hc *HeaderChain) ValidateHeaderChain(chain []*types.Header, checkFreq int)
return 0, nil
}

// InsertHeaderChain attempts to insert the given header chain in to the local
// chain, possibly creating a reorg. If an error is returned, it will return the
// index number of the failing header as well an error describing what went wrong.
// InsertHeaderChain inserts the given headers.
//
// The verify parameter can be used to fine tune whether nonce verification
// should be done or not. The reason behind the optional check is because some
// of the header retrieval mechanisms already need to verfy nonces, as well as
// because nonces can be verified sparsely, not needing to check each.
func (hc *HeaderChain) InsertHeaderChain(chain []*types.Header, writeHeader WhCallback, start time.Time) (int, error) {
// Collect some import statistics to report on
stats := struct{ processed, ignored int }{}
// All headers passed verification, import them into the database
for i, header := range chain {
// Short circuit insertion if shutting down
if hc.procInterrupt() {
log.Debug("Premature abort during headers import")
return i, errors.New("aborted")
}
// If the header's already known, skip it, otherwise store
hash := header.Hash()
if hc.HasHeader(hash, header.Number.Uint64()) {
externTd := hc.GetTd(hash, header.Number.Uint64())
localTd := hc.GetTd(hc.currentHeaderHash, hc.CurrentHeader().Number.Uint64())
if externTd == nil || externTd.Cmp(localTd) <= 0 {
stats.ignored++
continue
}
}
if err := writeHeader(header); err != nil {
return i, err
}
stats.processed++
// The validity of the headers is NOT CHECKED by this method, i.e. they need to be
// validated by ValidateHeaderChain before calling InsertHeaderChain.
//
// This insert is all-or-nothing. If this returns an error, no headers were written,
// otherwise they were all processed successfully.
//
// The returned 'write status' says if the inserted headers are part of the canonical chain
// or a side chain.
func (hc *HeaderChain) InsertHeaderChain(chain []*types.Header, start time.Time) (WriteStatus, error) {
if hc.procInterrupt() {
return 0, errors.New("aborted")
}
// Report some public statistics so the user has a clue what's going on
last := chain[len(chain)-1]
res, err := hc.writeHeaders(chain)

// Report some public statistics so the user has a clue what's going on
context := []interface{}{
"count", stats.processed, "elapsed", common.PrettyDuration(time.Since(start)),
"number", last.Number, "hash", last.Hash(),
"count", res.imported,
"elapsed", common.PrettyDuration(time.Since(start)),
}
if timestamp := time.Unix(int64(last.Time), 0); time.Since(timestamp) > time.Minute {
context = append(context, []interface{}{"age", common.PrettyAge(timestamp)}...)
if err != nil {
context = append(context, "err", err)
}
if last := res.lastHeader; last != nil {
context = append(context, "number", last.Number, "hash", res.lastHash)
if timestamp := time.Unix(int64(last.Time), 0); time.Since(timestamp) > time.Minute {
context = append(context, []interface{}{"age", common.PrettyAge(timestamp)}...)
}
}
if stats.ignored > 0 {
context = append(context, []interface{}{"ignored", stats.ignored}...)
if res.ignored > 0 {
context = append(context, []interface{}{"ignored", res.ignored}...)
}
log.Info("Imported new block headers", context...)

return 0, nil
return res.status, err
}

// GetBlockHashesFromHash retrieves a number of block hashes starting at a given
Expand Down
Loading