Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(storage): implement its own GC method #2802

Merged
merged 36 commits into from
Jun 19, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
86a372d
skeleton that rustc accepts
ailisp Jun 6, 2020
b459c16
replace first gc column
ailisp Jun 6, 2020
f6bbbdd
inc_gc
ailisp Jun 6, 2020
9c5ac42
inc_gc
ailisp Jun 6, 2020
4aef4da
missing +1
ailisp Jun 6, 2020
6c4d76f
make rustc happy again
ailisp Jun 6, 2020
1db3d37
add all other columns
ailisp Jun 8, 2020
a48d6ef
refactor to use chain_store_update.gc_col
ailisp Jun 10, 2020
64dbab7
fix tests
ailisp Jun 11, 2020
fd3bb2f
add checking gc col in test
ailisp Jun 11, 2020
61546f1
resolve conflict
ailisp Jun 11, 2020
c0e0f74
add gc column migration
ailisp Jun 11, 2020
afd8f0a
wabt-sys 0.7.2 failed to build with cmake 3.10, rollback to wabt-sys …
ailisp Jun 11, 2020
432f982
bump genesis version
ailisp Jun 12, 2020
547b2aa
Merge branch 'master' into db-col-gc-method
ailisp Jun 12, 2020
abf732e
remove genesis migration
ailisp Jun 12, 2020
c782790
bump storage version
ailisp Jun 12, 2020
b797951
bump storage version
ailisp Jun 12, 2020
86db77e
add info and test with master
ailisp Jun 12, 2020
89a4b7c
Merge branch 'master' into db-col-gc-method
Kouprin Jun 15, 2020
7fe3de6
remove double usage strum_macros::EnumIter
Kouprin Jun 15, 2020
80f444e
clean up comments
ailisp Jun 15, 2020
d2af587
return () from gc_col
ailisp Jun 16, 2020
c29eabe
BlockHeight
ailisp Jun 16, 2020
657ae8b
explicit no-op gc columns
ailisp Jun 16, 2020
bdd94ca
delete coltriechanges
ailisp Jun 16, 2020
e296745
special treat gc_col_state
ailisp Jun 16, 2020
b046369
assert col is in GC_COL
ailisp Jun 16, 2020
6dc423f
fix warning
ailisp Jun 16, 2020
8776ca8
Merge branch 'master' into db-col-gc-method
ailisp Jun 16, 2020
b143ca7
address frol and kouprin's comments
ailisp Jun 17, 2020
39dff99
type alias gccount, use vec instead of hashmap
ailisp Jun 17, 2020
cc80f35
address misha's comment
ailisp Jun 17, 2020
112c583
address misha's comment
ailisp Jun 17, 2020
a593777
address Kouprin's comment
ailisp Jun 18, 2020
a3803e7
Merge branch 'master' into db-col-gc-method
ailisp Jun 18, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions chain/chain/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use near_primitives::views::{
ExecutionOutcomeWithIdView, ExecutionStatusView, FinalExecutionOutcomeView,
FinalExecutionStatus, LightClientBlockView,
};
use near_store::{ColState, ColStateHeaders, ColStateParts, ShardTries, StoreUpdate};
use near_store::{ColState, ColStateHeaders, ColStateParts, ShardTries, StoreUpdate, GC_NOCACHE};

use crate::error::{Error, ErrorKind};
use crate::lightclient::get_epoch_block_producers_view;
Expand Down Expand Up @@ -945,7 +945,7 @@ impl Chain {
let mut store_update = StoreUpdate::new_with_tries(self.runtime_adapter.get_tries());
let stored_state = self.store().store().iter_prefix(ColState, &[]);
for (key, _) in stored_state {
store_update.delete(ColState, key.as_ref());
ColState.gc(&key.into(), &mut store_update, GC_NOCACHE);
}
let mut chain_store_update = self.mut_store().store_update();
chain_store_update.merge(store_update);
Expand Down Expand Up @@ -1790,7 +1790,7 @@ impl Chain {
let mut store_update = self.store.owned_store().store_update();
for part_id in 0..num_parts {
let key = StatePartKey(sync_hash, shard_id, part_id).try_to_vec()?;
store_update.delete(ColStateParts, &key);
ColStateParts.gc(&key.into(), &mut store_update, GC_NOCACHE);
}
Ok(store_update.commit()?)
}
Expand Down
154 changes: 103 additions & 51 deletions chain/chain/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use near_store::{
ColNextBlockHashes, ColNextBlockWithNewChunk, ColOutgoingReceipts, ColPartialChunks,
ColReceiptIdToShardId, ColStateChanges, ColStateDlInfos, ColStateHeaders, ColTransactionResult,
ColTransactions, ColTrieChanges, KeyForStateChanges, ShardTries, Store, StoreUpdate,
TrieChanges, WrappedTrieChanges, CHUNK_TAIL_KEY, HEADER_HEAD_KEY, HEAD_KEY,
TrieChanges, WrappedTrieChanges, CHUNK_TAIL_KEY, GC_NOCACHE, HEADER_HEAD_KEY, HEAD_KEY,
LARGEST_TARGET_HEIGHT_KEY, LATEST_KNOWN_KEY, SYNC_HEAD_KEY, TAIL_KEY,
};

Expand Down Expand Up @@ -1863,36 +1863,51 @@ impl<'a> ChainStoreUpdate<'a> {
debug_assert_eq!(chunk.header.inner.height_created, height);
// 1a. Delete from receipt_id_to_shard_id (ColReceiptIdToShardId)
ailisp marked this conversation as resolved.
Show resolved Hide resolved
for receipt in chunk.receipts {
store_update.delete(ColReceiptIdToShardId, receipt.receipt_id.as_ref());
self.chain_store
.receipt_id_to_shard_id
.cache_remove(&receipt.receipt_id.into());
ColReceiptIdToShardId.gc(
&receipt.receipt_id.into(),
&mut store_update,
Some(&mut self.chain_store.receipt_id_to_shard_id),
);
}
// 1b. Delete from ColTransactions
for transaction in chunk.transactions {
store_update.delete(ColTransactions, transaction.get_hash().as_ref());
self.chain_store.transactions.cache_remove(&transaction.get_hash().into());
ColTransactions.gc(
&transaction.get_hash().into(),
&mut store_update,
Some(&mut self.chain_store.transactions),
);
}

// 2. Delete chunk_hash-indexed data
let chunk_header_hash = chunk_hash.clone().into();
let chunk_header_hash_ref = chunk_hash.as_ref();
// 2a. Delete chunks (ColChunks)
store_update.delete(ColChunks, chunk_header_hash_ref);
self.chain_store.chunks.cache_remove(&chunk_header_hash);
ColChunks.gc(
&chunk_header_hash,
&mut store_update,
Some(&mut self.chain_store.chunks),
);
// 2b. Delete chunk extras (ColChunkExtra)
store_update.delete(ColChunkExtra, chunk_header_hash_ref);
self.chain_store.chunk_extras.cache_remove(&chunk_header_hash);
ColChunkExtra.gc(
&chunk_header_hash,
&mut store_update,
Some(&mut self.chain_store.chunk_extras),
);
// 2c. Delete partial_chunks (ColPartialChunks)
store_update.delete(ColPartialChunks, chunk_header_hash_ref);
self.chain_store.partial_chunks.cache_remove(&chunk_header_hash);
ColPartialChunks.gc(
&chunk_header_hash,
&mut store_update,
Some(&mut self.chain_store.partial_chunks),
);
// 2d. Delete invalid chunks (ColInvalidChunks)
store_update.delete(ColInvalidChunks, chunk_header_hash_ref);
self.chain_store.invalid_chunks.cache_remove(&chunk_header_hash);
ColInvalidChunks.gc(
&chunk_header_hash,
&mut store_update,
Some(&mut self.chain_store.invalid_chunks),
);
}
// 3. Delete chunks_tail-related data
// 3a. Delete from ColChunkHashesByHeight
store_update.delete(ColChunkHashesByHeight, &index_to_bytes(height));
ColChunkHashesByHeight.gc(&index_to_bytes(height), &mut store_update, GC_NOCACHE);
}
self.update_chunk_tail(min_chunk_height);
self.merge(store_update);
Expand All @@ -1919,6 +1934,13 @@ impl<'a> ChainStoreUpdate<'a> {
.map(|trie_changes: TrieChanges| {
tries
.revert_insertions(&trie_changes, shard_id, &mut store_update)
.map(|_| {
ColTrieChanges.gc(
&get_block_shard_id(&block_hash, shard_id).into(),
&mut store_update,
GC_NOCACHE,
)
})
ailisp marked this conversation as resolved.
Show resolved Hide resolved
.map_err(|err| ErrorKind::Other(err.to_string()))
})
.unwrap_or(Ok(()))?;
Expand All @@ -1932,6 +1954,13 @@ impl<'a> ChainStoreUpdate<'a> {
.map(|trie_changes: TrieChanges| {
tries
.apply_deletions(&trie_changes, shard_id, &mut store_update)
.map(|_| {
ColTrieChanges.gc(
&get_block_shard_id(&block_hash, shard_id).into(),
&mut store_update,
GC_NOCACHE,
)
})
.map_err(|err| ErrorKind::Other(err.to_string()))
})
.unwrap_or(Ok(()))?;
Expand Down Expand Up @@ -1961,62 +1990,74 @@ impl<'a> ChainStoreUpdate<'a> {

// 2. Delete shard_id-indexed data (shards, receipts, transactions)
for shard_id in 0..block.header.inner_rest.chunk_mask.len() as ShardId {
let height_shard_id = get_block_shard_id(&block_hash, shard_id);
// 2a. Delete outgoing receipts (ColOutgoingReceipts)
store_update.delete(ColOutgoingReceipts, &get_block_shard_id(&block_hash, shard_id));
self.chain_store
.outgoing_receipts
.cache_remove(&get_block_shard_id(&block_hash, shard_id));
ColOutgoingReceipts.gc(
&height_shard_id,
&mut store_update,
Some(&mut self.chain_store.outgoing_receipts),
);
// 2b. Delete incoming receipts (ColIncomingReceipts)
store_update.delete(ColIncomingReceipts, &get_block_shard_id(&block_hash, shard_id));
self.chain_store
.incoming_receipts
.cache_remove(&get_block_shard_id(&block_hash, shard_id));
ColIncomingReceipts.gc(
&height_shard_id,
&mut store_update,
Some(&mut self.chain_store.incoming_receipts),
);
// 2c. Delete from chunk_hash_per_height_shard (ColChunkPerHeightShard)
store_update.delete(ColChunkPerHeightShard, &get_height_shard_id(height, shard_id));
self.chain_store
.chunk_hash_per_height_shard
.cache_remove(&get_height_shard_id(height, shard_id));
ColChunkPerHeightShard.gc(
&height_shard_id,
&mut store_update,
Some(&mut self.chain_store.chunk_hash_per_height_shard),
);
// 2d. Delete from next_block_with_new_chunk (ColNextBlockWithNewChunk)
store_update
.delete(ColNextBlockWithNewChunk, &get_block_shard_id(&block_hash, shard_id));
self.chain_store
.next_block_with_new_chunk
.cache_remove(&get_block_shard_id(&block_hash, shard_id));
ColNextBlockWithNewChunk.gc(
&height_shard_id,
&mut store_update,
Some(&mut self.chain_store.next_block_with_new_chunk),
);
// 2e. Delete from ColStateHeaders
let key = StateHeaderKey(shard_id, block_hash).try_to_vec()?;
store_update.delete(ColStateHeaders, &key);
ColStateHeaders.gc(&key, &mut store_update, GC_NOCACHE);
// 2f. Delete from ColStateParts
// Already done, check chain.clear_downloaded_parts()
}

// 3. Delete block_hash-indexed data
let block_hash_ref = block_hash.as_ref();
let block_hash_vec: Vec<u8> = block_hash.as_ref().into();
// 3a. Delete block (ColBlock)
store_update.delete(ColBlock, block_hash_ref);
self.chain_store.blocks.cache_remove(&block_hash.into());
ColBlock.gc(&block_hash_vec, &mut store_update, Some(&mut self.chain_store.blocks));
// 3b. Delete block header (ColBlockHeader) - don't do because header sync needs headers
// 3c. Delete block extras (ColBlockExtra)
store_update.delete(ColBlockExtra, block_hash_ref);
self.chain_store.block_extras.cache_remove(&block_hash.into());
ColBlockExtra.gc(
&block_hash_vec,
&mut store_update,
Some(&mut self.chain_store.block_extras),
);
// 3d. Delete from next_block_hashes (ColNextBlockHashes)
store_update.delete(ColNextBlockHashes, block_hash_ref);
self.chain_store.next_block_hashes.cache_remove(&block_hash.into());
ColNextBlockHashes.gc(
&block_hash_vec,
&mut store_update,
Some(&mut self.chain_store.next_block_hashes),
);
// 3e. Delete from ColChallengedBlocks
store_update.delete(ColChallengedBlocks, block_hash_ref);
ColChallengedBlocks.gc(&block_hash_vec, &mut store_update, GC_NOCACHE);
// 3f. Delete from ColBlocksToCatchup
store_update.delete(ColBlocksToCatchup, block_hash_ref);
ColBlocksToCatchup.gc(&block_hash_vec, &mut store_update, GC_NOCACHE);
// 3g. Delete from KV state changes
let storage_key = KeyForStateChanges::get_prefix(&block_hash);
// 3g1. We should collect all the keys which key prefix equals to `block_hash`
let stored_state_changes =
self.chain_store.store().iter_prefix(ColStateChanges, storage_key.as_ref());
// 3g2. Remove from ColStateChanges all found State Changes
for (key, _) in stored_state_changes {
store_update.delete(ColStateChanges, key.as_ref());
ColStateChanges.gc(&key.into(), &mut store_update, GC_NOCACHE);
}
// 3h. Delete from ColBlockRefCount
store_update.delete(ColBlockRefCount, block_hash_ref);
self.chain_store.block_refcounts.cache_remove(&block_hash.into());
ColBlockRefCount.gc(
&block_hash_vec,
&mut store_update,
Some(&mut self.chain_store.block_refcounts),
);

match gc_mode {
GCMode::Fork(_) => {
Expand All @@ -2033,6 +2074,11 @@ impl<'a> ChainStoreUpdate<'a> {
&index_to_bytes(height),
&epoch_to_hashes,
)?;
ColBlockPerHeight.gc(
&vec![],
&mut store_update,
Some(&mut self.chain_store.block_hash_per_height),
);
ailisp marked this conversation as resolved.
Show resolved Hide resolved
self.chain_store
.block_hash_per_height
.cache_set(index_to_bytes(height), epoch_to_hashes);
Expand All @@ -2042,8 +2088,11 @@ impl<'a> ChainStoreUpdate<'a> {
GCMode::Canonical(_) => {
// 5. Canonical Chain clearing
// 5a. Delete blocks with current height (ColBlockPerHeight)
store_update.delete(ColBlockPerHeight, &index_to_bytes(height));
self.chain_store.block_hash_per_height.cache_remove(&index_to_bytes(height));
ColBlockPerHeight.gc(
&index_to_bytes(height),
&mut store_update,
Some(&mut self.chain_store.block_hash_per_height),
);
// 5b. Delete from ColBlockHeight - don't do because: block sync needs it + genesis should be accessible

// 6. Delete chunks and chunk-indexed data
Expand All @@ -2058,8 +2107,11 @@ impl<'a> ChainStoreUpdate<'a> {
GCMode::StateSync => {
// 7. State Sync clearing
// 7a. Delete blocks with current height (ColBlockPerHeight)
store_update.delete(ColBlockPerHeight, &index_to_bytes(height));
self.chain_store.block_hash_per_height.cache_remove(&index_to_bytes(height));
ColBlockPerHeight.gc(
&index_to_bytes(height),
&mut store_update,
Some(&mut self.chain_store.block_hash_per_height),
);
// 7b. Delete from ColBlockHeight - don't do because: block sync needs it + genesis should be accessible

// Chunks deleted separately
Expand Down
5 changes: 4 additions & 1 deletion core/store/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,12 @@ pub enum DBCol {
ColChunkHashesByHeight = 38,
/// Block ordinals.
ColBlockOrdinal = 39,
/// GC Count for each column
ColGCCount = 40,
}

// Do not move this line from enum DBCol
const NUM_COLS: usize = 40;
pub const NUM_COLS: usize = 41;

impl std::fmt::Display for DBCol {
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
Expand Down Expand Up @@ -133,6 +135,7 @@ impl std::fmt::Display for DBCol {
Self::ColBlockMerkleTree => "block merkle tree",
Self::ColChunkHashesByHeight => "chunk hashes indexed by height_created",
Self::ColBlockOrdinal => "block ordinal",
Self::ColGCCount => "gc count",
};
write!(formatter, "{}", desc)
}
Expand Down
Loading