Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fsync and do not generate empty files in snapshots #1345

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions common/ledger/blkstorage/blockindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ package blkstorage
import (
"bytes"
"fmt"
"path"
"path/filepath"
"unicode/utf8"

"github.com/golang/protobuf/proto"
Expand Down Expand Up @@ -260,13 +260,6 @@ func (index *blockIndex) exportUniqueTxIDs(dir string, newHashFunc snapshot.NewH
return nil, ErrAttrNotIndexed
}

// create the data file
dataFile, err := snapshot.CreateFile(path.Join(dir, snapshotDataFileName), snapshotFileFormat, newHashFunc)
if err != nil {
return nil, err
}
defer dataFile.Close()

dbItr := index.db.GetIterator([]byte{txIDIdxKeyPrefix}, []byte{txIDIdxKeyPrefix + 1})
defer dbItr.Release()
if err := dbItr.Error(); err != nil {
Expand All @@ -275,6 +268,8 @@ func (index *blockIndex) exportUniqueTxIDs(dir string, newHashFunc snapshot.NewH

var previousTxID string
var numTxIDs uint64 = 0
var dataFile *snapshot.FileWriter
var err error
for dbItr.Next() {
if err := dbItr.Error(); err != nil {
return nil, errors.Wrap(err, "internal leveldb error while iterating for txids")
Expand All @@ -288,19 +283,30 @@ func (index *blockIndex) exportUniqueTxIDs(dir string, newHashFunc snapshot.NewH
continue
}
previousTxID = txID
if numTxIDs == 0 { // first iteration, create the data file
dataFile, err = snapshot.CreateFile(filepath.Join(dir, snapshotDataFileName), snapshotFileFormat, newHashFunc)
Comment on lines +286 to +287
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to have no TxId in the store? Even the genesis block has a txID right? 4eff624

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as possible, it's not good to drag the assumptions in the lower level code that how the consumer uses it. At this level of the code, you can open a store and invoke the Export function.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see a useful use-case that would open an empty blockstore with no genesis block and then try to export txIDs. The same is true for the StateDB too. IMO, this disturbs the existing code flow unnecessarily. I understand your point too but I haven't got convinced.

IMO, empty files are okay as it conveys that the store is empty. As we have a difference of opinion, I leave this here.

if err != nil {
return nil, err
}
defer dataFile.Close()
}
if err := dataFile.EncodeString(txID); err != nil {
return nil, err
}
numTxIDs++
}

if dataFile == nil {
return nil, nil
}

dataHash, err := dataFile.Done()
if err != nil {
return nil, err
}

// create the metadata file
metadataFile, err := snapshot.CreateFile(path.Join(dir, snapshotMetadataFileName), snapshotFileFormat, newHashFunc)
metadataFile, err := snapshot.CreateFile(filepath.Join(dir, snapshotMetadataFileName), snapshotFileFormat, newHashFunc)
if err != nil {
return nil, err
}
Expand Down
33 changes: 20 additions & 13 deletions common/ledger/blkstorage/blockindex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"hash"
"io/ioutil"
"os"
"path"
"path/filepath"
"testing"

"github.com/hyperledger/fabric-protos-go/common"
Expand Down Expand Up @@ -270,20 +270,27 @@ func TestExportUniqueTxIDs(t *testing.T) {
defer blkfileMgrWrapper.close()
blkfileMgr := blkfileMgrWrapper.blockfileMgr

bg, gb := testutil.NewBlockGenerator(t, "myChannel", false)
blkfileMgr.addBlock(gb)

testSnapshotDir := testPath()
defer os.RemoveAll(testSnapshotDir)

// empty store generates no output
fileHashes, err := blkfileMgr.index.exportUniqueTxIDs(testSnapshotDir, testNewHashFunc)
require.NoError(t, err)
require.Empty(t, fileHashes)
files, err := ioutil.ReadDir(testSnapshotDir)
require.NoError(t, err)
require.Len(t, files, 0)

// add genesis block and test the exported bytes
bg, gb := testutil.NewBlockGenerator(t, "myChannel", false)
blkfileMgr.addBlock(gb)
configTxID, err := protoutil.GetOrComputeTxIDFromEnvelope(gb.Data.Data[0])
require.NoError(t, err)
fileHashes, err := blkfileMgr.index.exportUniqueTxIDs(testSnapshotDir, testNewHashFunc)
fileHashes, err = blkfileMgr.index.exportUniqueTxIDs(testSnapshotDir, testNewHashFunc)
require.NoError(t, err)
verifyExportedTxIDs(t, testSnapshotDir, fileHashes, configTxID)
os.Remove(path.Join(testSnapshotDir, snapshotDataFileName))
os.Remove(path.Join(testSnapshotDir, snapshotMetadataFileName))
os.Remove(filepath.Join(testSnapshotDir, snapshotDataFileName))
os.Remove(filepath.Join(testSnapshotDir, snapshotMetadataFileName))

// add block-1 and test the exported bytes
block1 := bg.NextBlockWithTxid(
Expand All @@ -300,8 +307,8 @@ func TestExportUniqueTxIDs(t *testing.T) {
fileHashes, err = blkfileMgr.index.exportUniqueTxIDs(testSnapshotDir, testNewHashFunc)
require.NoError(t, err)
verifyExportedTxIDs(t, testSnapshotDir, fileHashes, "txid-1", "txid-2", "txid-3", configTxID) //"txid-1" appears once, Txids appear in radix sort order
os.Remove(path.Join(testSnapshotDir, snapshotDataFileName))
os.Remove(path.Join(testSnapshotDir, snapshotMetadataFileName))
os.Remove(filepath.Join(testSnapshotDir, snapshotDataFileName))
os.Remove(filepath.Join(testSnapshotDir, snapshotMetadataFileName))

// add block-2 and test the exported bytes
block2 := bg.NextBlockWithTxid(
Expand Down Expand Up @@ -351,7 +358,7 @@ func TestExportUniqueTxIDsErrorCases(t *testing.T) {
defer os.RemoveAll(testSnapshotDir)

// error during data file creation
dataFilePath := path.Join(testSnapshotDir, snapshotDataFileName)
dataFilePath := filepath.Join(testSnapshotDir, snapshotDataFileName)
_, err := os.Create(dataFilePath)
require.NoError(t, err)
_, err = blkfileMgrWrapper.blockfileMgr.index.exportUniqueTxIDs(testSnapshotDir, testNewHashFunc)
Expand All @@ -361,7 +368,7 @@ func TestExportUniqueTxIDsErrorCases(t *testing.T) {
// error during metadata file creation
fmt.Printf("testSnapshotDir=%s", testSnapshotDir)
require.NoError(t, os.MkdirAll(testSnapshotDir, 0700))
metadataFilePath := path.Join(testSnapshotDir, snapshotMetadataFileName)
metadataFilePath := filepath.Join(testSnapshotDir, snapshotMetadataFileName)
_, err = os.Create(metadataFilePath)
require.NoError(t, err)
_, err = blkfileMgrWrapper.blockfileMgr.index.exportUniqueTxIDs(testSnapshotDir, testNewHashFunc)
Expand All @@ -388,13 +395,13 @@ func verifyExportedTxIDs(t *testing.T, dir string, fileHashes map[string][]byte,
require.Contains(t, fileHashes, snapshotDataFileName)
require.Contains(t, fileHashes, snapshotMetadataFileName)

dataFile := path.Join(dir, snapshotDataFileName)
dataFile := filepath.Join(dir, snapshotDataFileName)
dataFileContent, err := ioutil.ReadFile(dataFile)
require.NoError(t, err)
dataFileHash := sha256.Sum256(dataFileContent)
require.Equal(t, dataFileHash[:], fileHashes[snapshotDataFileName])

metadataFile := path.Join(dir, snapshotMetadataFileName)
metadataFile := filepath.Join(dir, snapshotMetadataFileName)
metadataFileContent, err := ioutil.ReadFile(metadataFile)
require.NoError(t, err)
metadataFileHash := sha256.Sum256(metadataFileContent)
Expand Down
3 changes: 3 additions & 0 deletions common/ledger/snapshot/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ func (c *FileWriter) Done() ([]byte, error) {
if err := c.bufWriter.Flush(); err != nil {
return nil, errors.Wrapf(err, "error while flushing to the snapshot file: %s ", c.file.Name())
}
if err := c.file.Sync(); err != nil {
return nil, err
}
if err := c.file.Close(); err != nil {
return nil, errors.Wrapf(err, "error while closing the snapshot file: %s ", c.file.Name())
}
Expand Down
26 changes: 17 additions & 9 deletions core/ledger/confighistory/mgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ package confighistory

import (
"fmt"
"path"
"path/filepath"

"github.com/golang/protobuf/proto"
"github.com/hyperledger/fabric-protos-go/common"
Expand Down Expand Up @@ -181,23 +181,27 @@ func (r *Retriever) CollectionConfigAt(blockNum uint64, chaincodeName string) (*
// extra bytes. Further, the collection config namespace is not expected to have
// millions of entries.
func (r *Retriever) ExportConfigHistory(dir string, newHashFunc snapshot.NewHashFunc) (map[string][]byte, error) {
dataFileWriter, err := snapshot.CreateFile(path.Join(dir, snapshotDataFileName), snapshotFileFormat, newHashFunc)
if err != nil {
return nil, err
}
defer dataFileWriter.Close()

nsItr := r.dbHandle.getNamespaceIterator(collectionConfigNamespace)
if err := nsItr.Error(); err != nil {
return nil, errors.Wrap(err, "internal leveldb error while obtaining db iterator")

}
defer nsItr.Release()

var numCollectionConfigs uint64 = 0
var dataFileWriter *snapshot.FileWriter
var err error
for nsItr.Next() {
if err := nsItr.Error(); err != nil {
return nil, errors.Wrap(err, "internal leveldb error while iterating for collection config history")
}
if numCollectionConfigs == 0 { // first iteration, create the data file
dataFileWriter, err = snapshot.CreateFile(filepath.Join(dir, snapshotDataFileName), snapshotFileFormat, newHashFunc)
Comment on lines +198 to +199
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarification Question

  • When the snapshot files are processed by the new peer, how would the new peer know all required files are present in a given directory? In other words, an admin can forget to copy a required file. I assume that in the snapshot.metadata file, there would be a mapping between file names (all non-empty) and its hash along with many other data such as height, hash. Then, the kvledger at the new peer would ensure all files are present as per the snapshot.metadata file before processing the snapshots. Am I correct?

Comment

  1. I agree that the store would be empty when there is no explicit collection config. I am worried that admin might say that a file is missing when it is missing intentionally. I assume snapshot.metadata is not in human readable format to find the list of files (a counter argument would be that admin can still look at each file and complain about empty content). A consistent set of files for all scenario looks natural to me and avoids confusion.
  2. If we allow empty files, the code path for both empty and non-empty files would be the same (does not look like a hack to me when we allow empty files for code simplicity). Further, the kvledger needs not to have logic like which files do not exist and which components should not be called, etc... Even if we pass this logic to individual components, it adds unnecessary complexity. Earlier code looked neat to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarification Question

* When the snapshot files are processed by the new peer, how would the new peer know all required files are present in a given directory? In other words, an admin can forget to copy a required file. I assume that in the snapshot.metadata file, there would be a mapping between file names (all non-empty) and its hash along with many other data such as height, hash. Then, the kvledger at the new peer would ensure all files are present as per the snapshot.metadata file before processing the snapshots. Am I correct?

Yes, that how it would work.

Comment

I assume snapshot.metadata is not in human readable format to find the list of files (a counter argument would be that admin can still look at each file and complain about empty content). A consistent set of files for all scenario looks natural to me and avoids confusion.

It would be human readable JSON, so an admin can match the hashes manually. IMO, it much clearer to have a set of files and match the names and hashes in the metadata. If a channel does not use private data and we keep exporting hashes files and collection config files, that may cause more confusion.

2. If we allow empty files, the code path for both empty and non-empty files would be the same (does not look like a hack to me when we allow empty files for code simplicity). Further, the kvledger needs not to have logic like which files do not exist and which components should not be called, etc... Even if we pass this logic to individual components, it adds unnecessary complexity. Earlier code looked neat to me.

When it comes to exporting the stuff, we should not apply the same parameter as we do for fully in-memory data. The in-memory data is not seen by anyone. In the case of exported stuff, I prefer the neatness of the exported data dictating the requirement than the other way around.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More on human readable JSON, this works until we add compression. We have to think more for allowing for compression. In any case, there will be a final hash that would represent the snapshot and that hash along with the basic info about the snapshot would be present in a JSON file.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I am saying is that when we have the same set of files all the time, it is easy for the consumer, i.e., the new peer processing these snapshot files. When the set of files can be different per snapshot, it adds unnecessary complication in the code at the consumer side.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I said before, to me neatness of exported data should dictate the code than the other way. Also, I am not sure what code complication are you referring to. During import, if data files are not present for a particular component, it simply returns. That's rather far more simpler to reason about.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. I was assuming that the kvledger would ask each component for the required file and only if it is present, kvledger would call the ImportXXX(dir). Now, I understand that the plan is to call ImportXXX(dir) and let the function return immediately if the respective store's file is not present.

if err != nil {
return nil, err
}
defer dataFileWriter.Close()
}
if err := dataFileWriter.EncodeBytes(nsItr.Key()); err != nil {
return nil, err
}
Expand All @@ -206,12 +210,16 @@ func (r *Retriever) ExportConfigHistory(dir string, newHashFunc snapshot.NewHash
}
numCollectionConfigs++
}

if dataFileWriter == nil {
return nil, nil
}

dataHash, err := dataFileWriter.Done()
if err != nil {
return nil, err
}

metadataFileWriter, err := snapshot.CreateFile(path.Join(dir, snapshotMetadataFileName), snapshotFileFormat, newHashFunc)
metadataFileWriter, err := snapshot.CreateFile(filepath.Join(dir, snapshotMetadataFileName), snapshotFileFormat, newHashFunc)
if err != nil {
return nil, err
}
Expand Down
22 changes: 18 additions & 4 deletions core/ledger/confighistory/mgr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,9 +308,10 @@ func TestExportConfigHistory(t *testing.T) {
// config history database is empty
fileHashes, err := env.retriever.ExportConfigHistory(env.testSnapshotDir, testNewHashFunc)
require.NoError(t, err)
verifyExportedConfigHistory(t, env.testSnapshotDir, fileHashes, nil)
os.Remove(path.Join(env.testSnapshotDir, snapshotDataFileName))
os.Remove(path.Join(env.testSnapshotDir, snapshotMetadataFileName))
require.Empty(t, fileHashes)
files, err := ioutil.ReadDir(env.testSnapshotDir)
require.NoError(t, err)
require.Len(t, files, 0)

// config history database has 3 chaincodes each with 1 collection config entry in the
// collectionConfigNamespace
Expand Down Expand Up @@ -426,10 +427,23 @@ func verifyExportedConfigHistory(t *testing.T, dir string, fileHashes map[string
func TestExportConfigHistoryErrorCase(t *testing.T) {
env := newTestEnvForSnapshot(t)
defer env.cleanup()

dbHandle := env.mgr.dbProvider.getDB("ledger1")
cc1collConfigPackage := testutilCreateCollConfigPkg([]string{"Explicit-cc1-coll-1", "Explicit-cc1-coll-2"})
batch, err := prepareDBBatch(
map[string]*peer.CollectionConfigPackage{
"chaincode1": cc1collConfigPackage,
},
50,
)
assert.NoError(t, err)
assert.NoError(t, dbHandle.writeBatch(batch, true))

// error during data file creation
dataFilePath := path.Join(env.testSnapshotDir, snapshotDataFileName)
_, err := os.Create(dataFilePath)
_, err = os.Create(dataFilePath)
require.NoError(t, err)

_, err = env.retriever.ExportConfigHistory(env.testSnapshotDir, testNewHashFunc)
require.Contains(t, err.Error(), "error while creating the snapshot file: "+dataFilePath)
os.RemoveAll(env.testSnapshotDir)
Expand Down
83 changes: 47 additions & 36 deletions core/ledger/kvledger/txmgmt/privacyenabledstate/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ package privacyenabledstate

import (
"hash"
"path"
"path/filepath"

"github.com/hyperledger/fabric/common/ledger/snapshot"
"github.com/hyperledger/fabric/core/ledger/kvledger/txmgmt/statedb"
Expand All @@ -33,28 +33,8 @@ func (s *DB) ExportPubStateAndPvtStateHashes(dir string, newHashFunc snapshot.Ne
}
defer itr.Close()

pubStateWriter, err := newSnapshotWriter(
path.Join(dir, pubStateDataFileName),
path.Join(dir, pubStateMetadataFileName),
dbValueFormat,
newHashFunc,
)
if err != nil {
return nil, err
}
defer pubStateWriter.close()

pvtStateHashesWriter, err := newSnapshotWriter(
path.Join(dir, pvtStateHashesFileName),
path.Join(dir, pvtStateHashesMetadataFileName),
dbValueFormat,
newHashFunc,
)
if err != nil {
return nil, err
}
defer pvtStateHashesWriter.close()

var pubStateWriter *snapshotWriter
var pvtStateHashesWriter *snapshotWriter
for {
compositeKey, dbValue, err := itr.Next()
if err != nil {
Expand All @@ -65,30 +45,61 @@ func (s *DB) ExportPubStateAndPvtStateHashes(dir string, newHashFunc snapshot.Ne
}
switch {
case isHashedDataNs(compositeKey.Namespace):
if pvtStateHashesWriter == nil { // encountered first time the pvt state hash element
pvtStateHashesWriter, err = newSnapshotWriter(
filepath.Join(dir, pvtStateHashesFileName),
filepath.Join(dir, pvtStateHashesMetadataFileName),
dbValueFormat,
newHashFunc,
)
if err != nil {
return nil, err
}
defer pvtStateHashesWriter.close()
}
if err := pvtStateHashesWriter.addData(compositeKey, dbValue); err != nil {
return nil, err
}
default:
if pubStateWriter == nil { // encountered first time the pub state element
pubStateWriter, err = newSnapshotWriter(
Comment on lines +64 to +65
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here also, the public state would be non-empty even at block height 1 as we store the genesis block. Earlier code structure -- file creation, dumping data to file, writing metadata, and closing it looked neater. If no data is dumped, it was reflected in the metadata.

If we still decide not to have empty files, having IsEmpty() at each store would make this more explicit and neat IMO.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same response as in the comment. Regarding IsEmpty, how do you handle for private state hashes?

filepath.Join(dir, pubStateDataFileName),
filepath.Join(dir, pubStateMetadataFileName),
dbValueFormat,
newHashFunc,
)
if err != nil {
return nil, err
}
defer pubStateWriter.close()
}
if err := pubStateWriter.addData(compositeKey, dbValue); err != nil {
return nil, err
}
}
}
pubStateDataHash, pubStateMetadataHash, err := pubStateWriter.done()
if err != nil {
return nil, err

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit.

Could return early here too if both writers are nil (just for the consistency with other export APIs).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll leave this as is. You typically return early to shorten an if block. Not applicable here.

snapshotFilesInfo := map[string][]byte{}

if pubStateWriter != nil {
pubStateDataHash, pubStateMetadataHash, err := pubStateWriter.done()
if err != nil {
return nil, err
}
snapshotFilesInfo[pubStateDataFileName] = pubStateDataHash
snapshotFilesInfo[pubStateMetadataFileName] = pubStateMetadataHash
}
pvtStateHahshesDataHash, pvtStateHashesMetadataHash, err := pvtStateHashesWriter.done()
if err != nil {
return nil, err

if pvtStateHashesWriter != nil {
pvtStateHahshesDataHash, pvtStateHashesMetadataHash, err := pvtStateHashesWriter.done()
if err != nil {
return nil, err
}
snapshotFilesInfo[pvtStateHashesFileName] = pvtStateHahshesDataHash
snapshotFilesInfo[pvtStateHashesMetadataFileName] = pvtStateHashesMetadataHash
}
return map[string][]byte{
pubStateDataFileName: pubStateDataHash,
pubStateMetadataFileName: pubStateMetadataHash,
pvtStateHashesFileName: pvtStateHahshesDataHash,
pvtStateHashesMetadataFileName: pvtStateHashesMetadataHash,
},
nil

return snapshotFilesInfo, nil
}

// snapshotWriter generates two files, a data file and a metadata file. The datafile contains a series of tuples <key, dbValue>
Expand Down
Loading