Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add size properties collector #2097

Merged
merged 12 commits into from
Aug 7, 2017
9 changes: 7 additions & 2 deletions src/bin/tikv-server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ use tikv::util::collections::HashMap;
use tikv::util::logger::{self, StderrLogger};
use tikv::util::file_log::RotatingFileLogger;
use tikv::util::transport::SendCh;
use tikv::util::properties::MvccPropertiesCollectorFactory;
use tikv::util::properties::{MvccPropertiesCollectorFactory, SizePropertiesCollectorFactory};
use tikv::server::{DEFAULT_LISTENING_ADDR, DEFAULT_CLUSTER_ID, Server, Node, Config,
create_raft_storage};
use tikv::server::transport::ServerRaftStoreRouter;
Expand Down Expand Up @@ -587,7 +587,10 @@ fn get_rocksdb_default_cf_option(config: &toml::Value, total_mem: u64) -> Column
default_values.whole_key_filtering = true;
default_values.compaction_pri = 3;

get_rocksdb_cf_option(config, "defaultcf", default_values)
let mut cf_opts = get_rocksdb_cf_option(config, "defaultcf", default_values);
let f = Box::new(SizePropertiesCollectorFactory::default());
cf_opts.add_table_properties_collector_factory("tikv.size-properties-collector", f);
cf_opts
}

fn get_rocksdb_write_cf_option(config: &toml::Value, total_mem: u64) -> ColumnFamilyOptions {
Expand All @@ -608,6 +611,8 @@ fn get_rocksdb_write_cf_option(config: &toml::Value, total_mem: u64) -> ColumnFa
// Collects user defined properties.
let f = Box::new(MvccPropertiesCollectorFactory::default());
cf_opts.add_table_properties_collector_factory("tikv.mvcc-properties-collector", f);
let f = Box::new(SizePropertiesCollectorFactory::default());
cf_opts.add_table_properties_collector_factory("tikv.size-properties-collector", f);
cf_opts
}

Expand Down
185 changes: 184 additions & 1 deletion src/util/properties.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
// limitations under the License.

use std::cmp;
use std::collections::HashMap;
use std::collections::{HashMap, BTreeMap};
use std::collections::Bound::{Included, Unbounded};
use std::u64;
use std::io::Read;

use storage::mvcc::{Write, WriteType};
use storage::types;
Expand All @@ -30,6 +32,9 @@ const PROP_NUM_PUTS: &'static str = "tikv.num_puts";
const PROP_NUM_VERSIONS: &'static str = "tikv.num_versions";
const PROP_MAX_ROW_VERSIONS: &'static str = "tikv.max_row_versions";
const PROP_NUM_ERRORS: &'static str = "tikv.num_errors";
const PROP_TOTAL_SIZE: &'static str = "tikv.total_size";
const PROP_SIZE_INDEX: &'static str = "tikv.size_index";
const PROP_SIZE_INDEX_DISTANCE: u64 = 4 * 1024 * 1024;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about if the SST target file size is smaller than 4 MB?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then we collect only the start key and the end key.


#[derive(Clone, Debug, Default)]
pub struct MvccProperties {
Expand Down Expand Up @@ -167,6 +172,101 @@ impl TablePropertiesCollectorFactory for MvccPropertiesCollectorFactory {
}
}

#[derive(Clone, Debug, Default)]
pub struct IndexHandle {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you mentioned, how to add rows informations in this struct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For example, we may have a RowsProperties like:

struct RowsProperties {
    total_rows: u64,
    index_handles: BTreeMap<Vec<u8>, IndexHandle>,
}

It needs to encode the index_handles too, so I put the encode_handles() in a common place.
Besides, I prefer to put those encode/decode in one place.

pub size: u64, // The size of the stored block
pub offset: u64, // The offset of the block in the file
}

#[derive(Default)]
pub struct SizeProperties {
pub total_size: u64,
pub index_handles: BTreeMap<Vec<u8>, IndexHandle>,
}

impl SizeProperties {
pub fn encode(&self) -> UserProperties {
let mut props = UserProperties::new();
props.encode_u64(PROP_TOTAL_SIZE, self.total_size);
props.encode_handles(PROP_SIZE_INDEX, &self.index_handles);
props
}

pub fn decode<T: DecodeProperties>(props: &T) -> Result<SizeProperties> {
let mut res = SizeProperties::default();
res.total_size = try!(props.decode_u64(PROP_TOTAL_SIZE));
res.index_handles = try!(props.decode_handles(PROP_SIZE_INDEX));
Ok(res)
}

pub fn get_approximate_size_in_range(&self, start: &[u8], end: &[u8]) -> u64 {
let mut range = self.index_handles.range::<[u8], _>((Included(start), Unbounded));
let start_offset = match range.next() {
Some((_, v)) => v.offset,
None => return 0,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any test to cover None here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, see test cases here.

};
let mut range = self.index_handles.range::<[u8], _>((Included(end), Unbounded));
let end_offset = match range.next() {
Some((_, v)) => v.offset,
None => {
// Last handle must exists if we have start offset.
let (_, v) = self.index_handles.iter().last().unwrap();
v.offset
}
};
assert!(end_offset >= start_offset);
end_offset - start_offset
}
}

pub struct SizePropertiesCollector {
props: SizeProperties,
last_key: Vec<u8>,
index_handle: IndexHandle,
}

impl SizePropertiesCollector {
fn new() -> SizePropertiesCollector {
SizePropertiesCollector {
props: SizeProperties::default(),
last_key: Vec::new(),
index_handle: IndexHandle::default(),
}
}
}

impl TablePropertiesCollector for SizePropertiesCollector {
fn add(&mut self, key: &[u8], value: &[u8], _: DBEntryType, _: u64, _: u64) {
let size = key.len() + value.len();
self.index_handle.size += size as u64;
self.index_handle.offset += size as u64;
// Add the start key for convenience.
if self.last_key.is_empty() || self.index_handle.size >= PROP_SIZE_INDEX_DISTANCE {
self.props.index_handles.insert(key.to_owned(), self.index_handle.clone());
self.index_handle.size = 0;
}
self.last_key.clear();
self.last_key.extend_from_slice(key);
}

fn finish(&mut self) -> HashMap<Vec<u8>, Vec<u8>> {
self.props.total_size = self.index_handle.offset;
if self.index_handle.size > 0 {
self.props.index_handles.insert(self.last_key.clone(), self.index_handle.clone());
}
self.props.encode().0
}
}

#[derive(Default)]
pub struct SizePropertiesCollectorFactory {}

impl TablePropertiesCollectorFactory for SizePropertiesCollectorFactory {
fn create_table_properties_collector(&mut self, _: u32) -> Box<TablePropertiesCollector> {
Box::new(SizePropertiesCollector::new())
}
}

pub struct UserProperties(HashMap<Vec<u8>, Vec<u8>>);

impl UserProperties {
Expand All @@ -183,6 +283,18 @@ impl UserProperties {
buf.encode_u64(value).unwrap();
self.encode(name, buf);
}

// Format: | klen | k | v.size | v.offset |
pub fn encode_handles(&mut self, name: &str, handles: &BTreeMap<Vec<u8>, IndexHandle>) {
Copy link
Member

@zhangjinpeng87 zhangjinpeng87 Aug 1, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is not belongs to here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because IndexHandle is a concept of SizeProperties.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really, it can be a common struct in the future. For example, we may add handles for rows count later.

let mut buf = Vec::with_capacity(1024);
for (k, v) in handles {
buf.encode_u64(k.len() as u64).unwrap();
buf.extend(k);
buf.encode_u64(v.size).unwrap();
buf.encode_u64(v.offset).unwrap();
}
self.encode(name, buf);
}
}

pub trait DecodeProperties {
Expand All @@ -192,6 +304,21 @@ pub trait DecodeProperties {
let mut buf = try!(self.decode(k));
buf.decode_u64()
}

fn decode_handles(&self, k: &str) -> Result<BTreeMap<Vec<u8>, IndexHandle>> {
let mut res = BTreeMap::new();
let mut buf = try!(self.decode(k));
while !buf.is_empty() {
let klen = try!(buf.decode_u64());
let mut k = vec![0; klen as usize];
try!(buf.read_exact(&mut k));
let mut v = IndexHandle::default();
v.size = try!(buf.decode_u64());
v.offset = try!(buf.decode_u64());
res.insert(k, v);
}
Ok(res)
}
}

impl DecodeProperties for UserProperties {
Expand Down Expand Up @@ -248,4 +375,60 @@ mod tests {
assert_eq!(props.num_versions, 7);
assert_eq!(props.max_row_versions, 3);
}

#[test]
fn test_size_properties_collector() {
let cases = [("a", 0),
// handle "a": size = 1, offset = 1,
("b", PROP_SIZE_INDEX_DISTANCE / 8),
("c", PROP_SIZE_INDEX_DISTANCE / 4),
("d", PROP_SIZE_INDEX_DISTANCE / 2),
("e", PROP_SIZE_INDEX_DISTANCE / 8),
// handle "e": size = DISTANCE + 4, offset = DISTANCE + 5
("f", PROP_SIZE_INDEX_DISTANCE / 4),
("g", PROP_SIZE_INDEX_DISTANCE / 2),
("h", PROP_SIZE_INDEX_DISTANCE / 8),
("i", PROP_SIZE_INDEX_DISTANCE / 4),
// handle "i": size = DISTANCE / 8 * 9 + 4, offset = DISTANCE / 8 * 17 + 9
("j", PROP_SIZE_INDEX_DISTANCE / 2),
("k", PROP_SIZE_INDEX_DISTANCE)];
// handle "k": size = DISTANCE / 8 * 12 + 2, offset = DISTANCE / 8 * 29 + 11

let mut collector = SizePropertiesCollector::new();
for &(k, vlen) in &cases {
let v = vec![0; vlen as usize];
collector.add(k.as_bytes(), &v, DBEntryType::Put, 0, 0);
}
let result = UserProperties(collector.finish());

let props = SizeProperties::decode(&result).unwrap();
assert_eq!(props.total_size, PROP_SIZE_INDEX_DISTANCE / 8 * 29 + 11);
let handles = &props.index_handles;
assert_eq!(handles.len(), 4);
let a = &handles[b"a".as_ref()];
assert_eq!(a.size, 1);
assert_eq!(a.offset, 1);
let e = &handles[b"e".as_ref()];
assert_eq!(e.size, PROP_SIZE_INDEX_DISTANCE + 4);
assert_eq!(e.offset, PROP_SIZE_INDEX_DISTANCE + 5);
let i = &handles[b"i".as_ref()];
assert_eq!(i.size, PROP_SIZE_INDEX_DISTANCE / 8 * 9 + 4);
assert_eq!(i.offset, PROP_SIZE_INDEX_DISTANCE / 8 * 17 + 9);
let k = &handles[b"k".as_ref()];
assert_eq!(k.size, PROP_SIZE_INDEX_DISTANCE / 8 * 12 + 2);
assert_eq!(k.offset, PROP_SIZE_INDEX_DISTANCE / 8 * 29 + 11);

let cases = [(" ", "z", k.offset - a.offset),
(" ", " ", 0),
("z", "z", 0),
("a", "k", k.offset - a.offset),
("a", "i", i.offset - a.offset),
("e", "h", i.offset - e.offset),
("g", "h", 0),
("g", "g", 0)];
for &(start, end, size) in &cases {
assert_eq!(props.get_approximate_size_in_range(start.as_bytes(), end.as_bytes()),
size);
}
}
}