Skip to content

Commit

Permalink
Merge pull request btcsuite#69 from kcalvinalvin/fix-pruning-bugs
Browse files Browse the repository at this point in the history
database, main, blockchain: improve pruning code
  • Loading branch information
kcalvinalvin authored Aug 30, 2023
2 parents d50d809 + c59a0b3 commit 0849b7d
Show file tree
Hide file tree
Showing 6 changed files with 310 additions and 79 deletions.
4 changes: 2 additions & 2 deletions blockchain/chain.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ type BlockChain struct {

// pruneTarget is the size in bytes the database targets for when the node
// is pruned.
pruneTarget uint32
pruneTarget uint64

// These fields are related to the memory block index. They both have
// their own locks, however they are often also protected by the chain
Expand Down Expand Up @@ -2353,7 +2353,7 @@ type Config struct {

// Prune specifies the target database usage (in bytes) the database will target for with
// block and spend journal files. Prune at 0 specifies that no blocks will be deleted.
Prune uint32
Prune uint64
}

// New returns a BlockChain instance using the provided configuration details.
Expand Down
2 changes: 1 addition & 1 deletion config.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ type config struct {
UtxoCacheMaxSizeMiB uint `long:"utxocachemaxsize" description:"The maximum size in MiB of the UTXO cache"`
Utreexo bool `long:"utreexo" description:"Use utreexo compact state during block validation"`
NoWinService bool `long:"nowinservice" description:"Do not start as a background service on Windows -- NOTE: This flag only works on the command line, not in the config file"`
Prune uint32 `long:"prune" description:"Prune already validated blocks from the database. Must specify a target size in MiB (minimum value of 550)"`
Prune uint64 `long:"prune" description:"Prune already validated blocks from the database. Must specify a target size in MiB (minimum value of 550, default of 0 to disable pruning)"`

// Profiling options.
Profile string `long:"profile" description:"Enable HTTP profiling on given port -- NOTE port must be between 1024 and 65536"`
Expand Down
105 changes: 42 additions & 63 deletions database/ffldb/blockio.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"io"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -347,34 +348,12 @@ func (s *blockStore) fileSize(fileNum uint32) (uint32, error) {

// fileStartEndNum returns the first and the last file number in the database.
func (s *blockStore) fileStartEndNum() (uint32, uint32, error) {
// We use the spendJournalFileExtension as there's a guarantee that there
// will be a same number of block files as there are spend journal files.
//
// The only error we can get is a bad pattern error. Since we're hardcoding
// the pattern, we should not have an error at runtime.
files, _ := filepath.Glob(filepath.Join(s.basePath, "*"+spendJournalFileExtension))

lastFile, firstFile := -1, -1
for _, file := range files {
// Extract the file number. We can use just the spend journal extension
// since there is one spend journal file per block file.
fileNum := file
fileNum = strings.TrimPrefix(fileNum, s.basePath+"/")
fileNum = strings.TrimSuffix(fileNum, spendJournalFileExtension)

// Turn the string into a number.
var err error
lastFile, err = strconv.Atoi(fileNum)
if err != nil {
return 0, 0, err
}

if firstFile == -1 {
firstFile = lastFile
}
first, last, _, err := scanBlockFiles(s.basePath, s.filePathFunc)
if err != nil {
return 0, 0, err
}

return uint32(firstFile), uint32(lastFile), nil
return uint32(first), uint32(last), nil
}

// blockFile attempts to return an existing file handle for the passed flat file
Expand Down Expand Up @@ -852,46 +831,46 @@ func (s *blockStore) calcBlockFilesSize() (uint32, uint32, uint32, error) {
}

// scanBlockFiles searches the database directory for all flat block files to
// find the end of the most recent file. This position is considered the
// current write cursor which is also stored in the metadata. Thus, it is used
// to detect unexpected shutdowns in the middle of writes so the block files
// can be reconciled.
// find the first file, last file, and the end of the most recent file. The
// position at the last file is considered the current write cursor which is
// also stored in the metadata. Thus, it is used to detect unexpected shutdowns
// in the middle of writes so the block files can be reconciled.
func scanBlockFiles(dbPath string,
filePathFunc func(dbPath string, fileNum uint32) string) (int, uint32, error) {

// The only error we can get is a bad pattern error. Since we're hardcoding
// the pattern, we should not have an error at runtime.
files, _ := filepath.Glob(filepath.Join(dbPath, "*"+spendJournalFileExtension))

var err error
lastFile := -1
fileLen := uint32(0)
for _, file := range files {
// Extract the file number. We can use just the spend journal extension
// since there is one spend journal file per block file.
fileNum := file
fileNum = strings.TrimPrefix(fileNum, dbPath+"/")
fileNum = strings.TrimSuffix(fileNum, spendJournalFileExtension)

// Turn the string into a number.
lastFile, err = strconv.Atoi(fileNum)
if err != nil {
return 0, 0, err
}
filePathFunc func(dbPath string, fileNum uint32) string) (int, int, uint32, error) {
firstFile, lastFile, lastFileLen, err := int(-1), int(-1), uint32(0), error(nil)

// Actually get the file info.
filePath := filePathFunc(dbPath, uint32(lastFile))
st, err := os.Stat(filePath)
if err != nil {
return 0, 0, err
}
files, err := filepath.Glob(filepath.Join(dbPath, "*"+spendJournalFileExtension))
if err != nil {
return 0, 0, 0, err
}
sort.Strings(files)

// Return early if there's no block files.
if len(files) == 0 {
return firstFile, lastFile, lastFileLen, nil
}

// Grab the first and last file's number.
firstFile, err = strconv.Atoi(strings.TrimSuffix(filepath.Base(files[0]), spendJournalFileExtension))
if err != nil {
return 0, 0, 0, fmt.Errorf("scanBlockFiles error: %v", err)
}
lastFile, err = strconv.Atoi(strings.TrimSuffix(filepath.Base(files[len(files)-1]), spendJournalFileExtension))
if err != nil {
return 0, 0, 0, fmt.Errorf("scanBlockFiles error: %v", err)
}

fileLen = uint32(st.Size())
// Get the last file's length.
filePath := filePathFunc(dbPath, uint32(lastFile))
st, err := os.Stat(filePath)
if err != nil {
return 0, 0, 0, err
}
lastFileLen = uint32(st.Size())

log.Tracef("Scan found latest block file #%d with length %d", lastFile, lastFileLen)

log.Tracef("Scan found latest block file #%d with length %d", lastFile,
fileLen)
return lastFile, fileLen, nil
return firstFile, lastFile, lastFileLen, err
}

// newBlockStore returns a new block store with the current block file number
Expand All @@ -900,7 +879,7 @@ func newBlockStore(basePath string, network wire.BitcoinNet) (*blockStore, error
// Look for the end of the latest block to file to determine what the
// write cursor position is from the viewpoing of the block files on
// disk.
fileNum, fileOff, err := scanBlockFiles(basePath, blockFilePath)
_, fileNum, fileOff, err := scanBlockFiles(basePath, blockFilePath)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -938,7 +917,7 @@ func newSJStore(basePath string, network wire.BitcoinNet) (*blockStore, error) {
// Look for the end of the latest block to file to determine what the
// write cursor position is from the viewpoing of the block files on
// disk.
fileNum, fileOff, err := scanBlockFiles(basePath, spendJournalFilePath)
_, fileNum, fileOff, err := scanBlockFiles(basePath, spendJournalFilePath)
if err != nil {
return nil, err
}
Expand Down
96 changes: 84 additions & 12 deletions database/ffldb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,10 @@ type transaction struct {
pendingBlocks map[chainhash.Hash]int
pendingBlockData []pendingData

// Files that need to be deleted on commit. These are the files that
// are marked as files to be deleted during pruning.
pendingDelFileNums []uint32

// Spend journals that need to be stored on commit. The pendingSpendJournal map is
// kept to allow quick lookups of pending spend journals by block hash.
pendingSpendJournals map[chainhash.Hash]int
Expand Down Expand Up @@ -1647,6 +1651,9 @@ func (tx *transaction) close() {
tx.pendingKeys = nil
tx.pendingRemove = nil

// Clear pending file deletions.
tx.pendingDelFileNums = nil

// Release the snapshot.
if tx.snapshot != nil {
tx.snapshot.Release()
Expand All @@ -1669,6 +1676,25 @@ func (tx *transaction) close() {
//
// This function MUST only be called when there is pending data to be written.
func (tx *transaction) writePendingAndCommit() error {
// Loop through all the pending file deletions and delete them.
// We do this first before doing any of the writes as we can't undo
// deletions of files.
for _, fileNum := range tx.pendingDelFileNums {
err := tx.db.blkStore.deleteFileFunc(fileNum)
if err != nil {
// Nothing we can do if we fail to delete blocks besides
// return an error.
return err
}

err = tx.db.sjStore.deleteFileFunc(fileNum)
if err != nil {
// Nothing we can do if we fail to delete blocks besides
// return an error.
return err
}
}

// Save the current block store write position for potential rollback.
// These variables are only updated here in this function and there can
// only be one write transaction active at a time, so it's safe to store
Expand Down Expand Up @@ -1868,12 +1894,27 @@ func (tx *transaction) FetchSpendJournal(hash *chainhash.Hash) ([]byte, error) {
return sjBytes, nil
}

// BeenPruned returns if the block storage has ever been pruned.
//
// This function is part of the database.Tx interface implementation.
func (tx *transaction) BeenPruned() (bool, error) {
first, last, _, err := scanBlockFiles(tx.db.blkStore.basePath, tx.db.blkStore.filePathFunc)
if err != nil {
return false, err
}

// If the database is pruned, then the first .fdb will not be there.
// We also check that there isn't just 1 file on disk or if there are
// no files on disk by checking if first != last.
return first != 0 && (first != last), nil
}

// PruneBlocks prunes block and spend journal files and attempts to reach the target size.
// If there's a minimum block height that the caller must keep, specifying keep height
// will prevent the block file including that block from getting deleted.
// Because of this, sometimes PruneBlocks may not prune until it reaches the target size
// but will attempt to get close to it.
func (tx *transaction) PruneBlocks(targetSize uint32, keepHeight int32) (int32, error) {
func (tx *transaction) PruneBlocks(targetSize uint64, keepHeight int32) (int32, error) {
// Ensure transaction state is valid.
if err := tx.checkClosed(); err != nil {
return 0, err
Expand All @@ -1885,14 +1926,21 @@ func (tx *transaction) PruneBlocks(targetSize uint32, keepHeight int32) (int32,
return 0, makeDbErr(database.ErrTxNotWritable, str, nil)
}

maxSize := tx.db.blkStore.maxBlockFileSize
if targetSize < uint64(maxSize) {
return -1, fmt.Errorf("got target size of %d but it must be greater "+
"than %d, the max size of a single block file",
targetSize, maxSize)
}

// Grab the first and last file numbers and the size for both the block and
// spend journal files.
firstBlkFile, lastBlkFile, blkSize, _ := tx.db.blkStore.calcBlockFilesSize()
_, _, sjSize, _ := tx.db.sjStore.calcBlockFilesSize()

// If the total size of block files and spend journal files are under the target,
// return early and don't prune.
totalSize := blkSize + sjSize
totalSize := uint64(blkSize + sjSize)
if totalSize <= targetSize {
return -1, nil
}
Expand All @@ -1902,6 +1950,7 @@ func (tx *transaction) PruneBlocks(targetSize uint32, keepHeight int32) (int32,
targetSize/(1024*1024))

earliestHeight := int32(math.MaxInt32)
deletedFiles := make(map[uint32]struct{})
for i := firstBlkFile; i <= lastBlkFile; i++ {
blkSize, err := tx.db.blkStore.fileSizeFunc(i)
if err != nil {
Expand All @@ -1926,7 +1975,7 @@ func (tx *transaction) PruneBlocks(targetSize uint32, keepHeight int32) (int32,
break
}

// If the last height in this file is eqaul or greater than the
// If the last height in this file is equal or greater than the
// keep height, keep this file but keep looking for other files to
// delete.
if lastHeight >= keepHeight && keepHeight > 0 {
Expand All @@ -1938,18 +1987,33 @@ func (tx *transaction) PruneBlocks(targetSize uint32, keepHeight int32) (int32,
continue
}

deletedSize := blkSize + stSize
deletedSize := uint64(blkSize + stSize)
totalSize -= deletedSize

err = tx.db.blkStore.deleteFileFunc(i)
if err != nil {
log.Warnf("PruneBlocks: Failed to delete block file "+
"number %d: %v", i, err)
// Add the block file to be deleted to the list of files pending deletion to
// delete when the transaction is committed.
if tx.pendingDelFileNums == nil {
tx.pendingDelFileNums = make([]uint32, 0, 1)
}
err = tx.db.sjStore.deleteFileFunc(i)
if err != nil {
log.Warnf("PruneBlocks: Failed to delete spend journal file "+
"number %d: %v", i, err)
tx.pendingDelFileNums = append(tx.pendingDelFileNums, i)
deletedFiles[i] = struct{}{}

// Remove the block height info from the database since we're going to prune
// this file.
tx.removeHeightInfo(i)
}

// Delete the indexed block locations for the files that we've just deleted.
cursor := tx.blockIdxBucket.Cursor()
for ok := cursor.First(); ok; ok = cursor.Next() {
loc := deserializeBlockLoc(cursor.Value())

_, found := deletedFiles[loc.blockFileNum]
if found {
err := cursor.Delete()
if err != nil {
return 0, err
}
}
}

Expand Down Expand Up @@ -2038,6 +2102,14 @@ func (tx *transaction) putHeightInfo(fileNum uint32, firstHeight, lastHeight int
return tx.blockHeightBucket.Put(blockNumBytes[:], writeBytes[:])
}

// removeHeightInfo removes the index for the height info from the database.
func (tx *transaction) removeHeightInfo(fileNum uint32) error {
var blockNumBytes [4]byte
binary.LittleEndian.PutUint32(blockNumBytes[:], fileNum)

return tx.blockHeightBucket.Delete(blockNumBytes[:])
}

// db represents a collection of namespaces which are persisted and implements
// the database.DB interface. All database access is performed through
// transactions which are obtained through the specific Namespace.
Expand Down
Loading

0 comments on commit 0849b7d

Please sign in to comment.