Skip to content

Commit

Permalink
Merge pull request #8358 from dantengsky/feat-bloom-index-cache-config
Browse files Browse the repository at this point in the history
feat: config options for bloom filter cache
  • Loading branch information
BohuTANG authored Oct 21, 2022
2 parents d2a9f03 + db39093 commit d9a2aac
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 34 deletions.
18 changes: 12 additions & 6 deletions src/query/config/src/inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,6 @@ pub struct QueryConfig {
pub max_query_log_size: usize,
/// Table Cached enabled
pub table_cache_enabled: bool,
/// Max number of cached table snapshot
pub table_cache_snapshot_count: u64,
/// Max number of cached table segment
pub table_cache_segment_count: u64,
/// Max number of cached table block meta
pub table_cache_block_meta_count: u64,
/// Table memory cache size (mb)
Expand All @@ -142,6 +138,14 @@ pub struct QueryConfig {
pub table_disk_cache_root: String,
/// Table disk cache size (mb)
pub table_disk_cache_mb_size: u64,
/// Max number of cached table snapshot
pub table_cache_snapshot_count: u64,
/// Max number of cached table segment
pub table_cache_segment_count: u64,
/// Max number of cached bloom index meta objects
pub table_cache_bloom_index_meta_count: u64,
/// Max bytes of cached bloom index
pub table_cache_bloom_index_data_bytes: u64,
/// If in management mode, only can do some meta level operations(database/table/user/stage etc.) with metasrv.
pub management_mode: bool,
pub jwt_key_file: String,
Expand Down Expand Up @@ -183,12 +187,14 @@ impl Default for QueryConfig {
wait_timeout_mills: 5000,
max_query_log_size: 10000,
table_cache_enabled: false,
table_cache_snapshot_count: 256,
table_cache_segment_count: 10240,
table_cache_block_meta_count: 102400,
table_memory_cache_mb_size: 256,
table_disk_cache_root: "_cache".to_string(),
table_disk_cache_mb_size: 1024,
table_cache_snapshot_count: 256,
table_cache_segment_count: 10240,
table_cache_bloom_index_meta_count: 3000,
table_cache_bloom_index_data_bytes: 1024 * 1024 * 1024,
management_mode: false,
jwt_key_file: "".to_string(),
async_insert_max_data_size: 10000,
Expand Down
36 changes: 24 additions & 12 deletions src/query/config/src/outer_v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -886,14 +886,6 @@ pub struct QueryConfig {
#[clap(long)]
pub table_cache_enabled: bool,

/// Max number of cached table snapshot
#[clap(long, default_value = "256")]
pub table_cache_snapshot_count: u64,

/// Max number of cached table segment
#[clap(long, default_value = "10240")]
pub table_cache_segment_count: u64,

/// Max number of cached table block meta
#[clap(long, default_value = "102400")]
pub table_cache_block_meta_count: u64,
Expand All @@ -910,6 +902,22 @@ pub struct QueryConfig {
#[clap(long, default_value = "1024")]
pub table_disk_cache_mb_size: u64,

/// Max number of cached table snapshot
#[clap(long, default_value = "256")]
pub table_cache_snapshot_count: u64,

/// Max number of cached table segment
#[clap(long, default_value = "10240")]
pub table_cache_segment_count: u64,

/// Max number of cached bloom index meta objects
#[clap(long, default_value = "3000")]
pub table_cache_bloom_index_meta_count: u64,

/// Max bytes of cached bloom index, default value is 1GB
#[clap(long, default_value = "1073741824")]
pub table_cache_bloom_index_data_bytes: u64,

/// If in management mode, only can do some meta level operations(database/table/user/stage etc.) with metasrv.
#[clap(long)]
pub management_mode: bool,
Expand Down Expand Up @@ -975,12 +983,14 @@ impl TryInto<InnerQueryConfig> for QueryConfig {
wait_timeout_mills: self.wait_timeout_mills,
max_query_log_size: self.max_query_log_size,
table_cache_enabled: self.table_cache_enabled,
table_cache_snapshot_count: self.table_cache_snapshot_count,
table_cache_segment_count: self.table_cache_segment_count,
table_cache_block_meta_count: self.table_cache_block_meta_count,
table_memory_cache_mb_size: self.table_memory_cache_mb_size,
table_disk_cache_root: self.table_disk_cache_root,
table_disk_cache_mb_size: self.table_disk_cache_mb_size,
table_cache_snapshot_count: self.table_cache_snapshot_count,
table_cache_segment_count: self.table_cache_segment_count,
table_cache_bloom_index_meta_count: self.table_cache_bloom_index_meta_count,
table_cache_bloom_index_data_bytes: self.table_cache_bloom_index_data_bytes,
management_mode: self.management_mode,
jwt_key_file: self.jwt_key_file,
async_insert_max_data_size: self.async_insert_max_data_size,
Expand Down Expand Up @@ -1032,12 +1042,14 @@ impl From<InnerQueryConfig> for QueryConfig {
wait_timeout_mills: inner.wait_timeout_mills,
max_query_log_size: inner.max_query_log_size,
table_cache_enabled: inner.table_cache_enabled,
table_cache_snapshot_count: inner.table_cache_snapshot_count,
table_cache_segment_count: inner.table_cache_segment_count,
table_cache_block_meta_count: inner.table_cache_block_meta_count,
table_memory_cache_mb_size: inner.table_memory_cache_mb_size,
table_disk_cache_root: inner.table_disk_cache_root,
table_disk_cache_mb_size: inner.table_disk_cache_mb_size,
table_cache_snapshot_count: inner.table_cache_snapshot_count,
table_cache_segment_count: inner.table_cache_segment_count,
table_cache_bloom_index_meta_count: inner.table_cache_bloom_index_meta_count,
table_cache_bloom_index_data_bytes: inner.table_cache_bloom_index_data_bytes,
management_mode: inner.management_mode,
jwt_key_file: inner.jwt_key_file,
async_insert_max_data_size: inner.async_insert_max_data_size,
Expand Down
67 changes: 62 additions & 5 deletions src/query/service/tests/it/configs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,14 @@ database_engine_github_enabled = true
wait_timeout_mills = 5000
max_query_log_size = 10000
table_cache_enabled = false
table_cache_snapshot_count = 256
table_cache_segment_count = 10240
table_cache_block_meta_count = 102400
table_memory_cache_mb_size = 256
table_disk_cache_root = "_cache"
table_disk_cache_mb_size = 1024
table_cache_snapshot_count = 256
table_cache_segment_count = 10240
table_cache_bloom_index_meta_count = 3000
table_cache_bloom_index_data_bytes = 1073741824
management_mode = false
jwt_key_file = ""
async_insert_max_data_size = 10000
Expand Down Expand Up @@ -185,6 +187,13 @@ fn test_env_config_s3() -> Result<()> {
("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")),
("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")),
("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")),
("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")),
("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")),
("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")),
(
"TABLE_CACHE_BLOOM_INDEX_DATA_BYTES",
Some(format!("{}", 1024 * 1024 * 1024).as_str()),
),
("STORAGE_TYPE", Some("s3")),
("STORAGE_NUM_CPUS", Some("16")),
("STORAGE_FS_DATA_PATH", Some("/tmp/test")),
Expand Down Expand Up @@ -254,9 +263,13 @@ fn test_env_config_s3() -> Result<()> {
assert!(configured.query.table_engine_memory_enabled);

assert!(configured.query.table_cache_enabled);
assert_eq!(512, configured.query.table_memory_cache_mb_size);
assert_eq!("_cache_env", configured.query.table_disk_cache_root);
assert_eq!(512, configured.query.table_disk_cache_mb_size);
assert_eq!(10240, configured.query.table_cache_segment_count);
assert_eq!(256, configured.query.table_cache_snapshot_count);
assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count);
assert_eq!(
1024 * 1024 * 1024,
configured.query.table_cache_bloom_index_data_bytes
);
},
);

Expand Down Expand Up @@ -287,6 +300,13 @@ fn test_env_config_fs() -> Result<()> {
("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")),
("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")),
("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")),
("QU-ERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")),
("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")),
("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")),
(
"TABLE_CACHE_BLOOM_INDEX_DATA_BYTES",
Some(format!("{}", 1024 * 1024 * 1024).as_str()),
),
("STORAGE_TYPE", Some("fs")),
("STORAGE_NUM_CPUS", Some("16")),
("STORAGE_FS_DATA_PATH", Some("/tmp/test")),
Expand Down Expand Up @@ -359,6 +379,13 @@ fn test_env_config_fs() -> Result<()> {
assert_eq!(512, configured.query.table_memory_cache_mb_size);
assert_eq!("_cache_env", configured.query.table_disk_cache_root);
assert_eq!(512, configured.query.table_disk_cache_mb_size);
assert_eq!(10240, configured.query.table_cache_segment_count);
assert_eq!(256, configured.query.table_cache_snapshot_count);
assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count);
assert_eq!(
1024 * 1024 * 1024,
configured.query.table_cache_bloom_index_data_bytes
);
},
);

Expand Down Expand Up @@ -388,6 +415,13 @@ fn test_env_config_gcs() -> Result<()> {
("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")),
("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")),
("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")),
("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")),
("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")),
("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")),
(
"TABLE_CACHE_BLOOM_INDEX_DATA_BYTES",
Some(format!("{}", 1024 * 1024 * 1024).as_str()),
),
("STORAGE_TYPE", Some("gcs")),
("STORAGE_NUM_CPUS", Some("16")),
("STORAGE_FS_DATA_PATH", Some("/tmp/test")),
Expand Down Expand Up @@ -467,6 +501,13 @@ fn test_env_config_gcs() -> Result<()> {
assert_eq!(512, configured.query.table_memory_cache_mb_size);
assert_eq!("_cache_env", configured.query.table_disk_cache_root);
assert_eq!(512, configured.query.table_disk_cache_mb_size);
assert_eq!(10240, configured.query.table_cache_segment_count);
assert_eq!(256, configured.query.table_cache_snapshot_count);
assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count);
assert_eq!(
1024 * 1024 * 1024,
configured.query.table_cache_bloom_index_data_bytes
);
},
);

Expand Down Expand Up @@ -496,6 +537,13 @@ fn test_env_config_oss() -> Result<()> {
("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")),
("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")),
("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")),
("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")),
("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")),
("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")),
(
"TABLE_CACHE_BLOOM_INDEX_DATA_BYTES",
Some(format!("{}", 1024 * 1024 * 1024).as_str()),
),
("STORAGE_TYPE", Some("oss")),
("STORAGE_NUM_CPUS", Some("16")),
("STORAGE_FS_DATA_PATH", Some("/tmp/test")),
Expand Down Expand Up @@ -582,6 +630,13 @@ fn test_env_config_oss() -> Result<()> {
assert_eq!(512, configured.query.table_memory_cache_mb_size);
assert_eq!("_cache_env", configured.query.table_disk_cache_root);
assert_eq!(512, configured.query.table_disk_cache_mb_size);
assert_eq!(10240, configured.query.table_cache_segment_count);
assert_eq!(256, configured.query.table_cache_snapshot_count);
assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count);
assert_eq!(
1024 * 1024 * 1024,
configured.query.table_cache_bloom_index_data_bytes
);
},
);
Ok(())
Expand Down Expand Up @@ -634,6 +689,8 @@ table_cache_block_meta_count = 102400
table_memory_cache_mb_size = 256
table_disk_cache_root = "_cache"
table_disk_cache_mb_size = 1024
table_cache_bloom_index_meta_count = 3000
table_cache_bloom_index_data_bytes = 1073741824
management_mode = false
jwt_key_file = ""
async_insert_max_data_size = 10000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo
| query | rpc_tls_server_key | | |
| query | share_endpoint_address | | |
| query | table_cache_block_meta_count | 102400 | |
| query | table_cache_bloom_index_data_bytes | 1073741824 | |
| query | table_cache_bloom_index_meta_count | 3000 | |
| query | table_cache_enabled | false | |
| query | table_cache_segment_count | 10240 | |
| query | table_cache_snapshot_count | 256 | |
Expand Down
19 changes: 8 additions & 11 deletions src/query/storages/fuse-meta/src/caches/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,13 @@ use crate::caches::ItemCache;
use crate::caches::SegmentInfoCache;
use crate::caches::TableSnapshotCache;

// default number of index meta cached, default 3000 items
static DEFAULT_BLOOM_INDEX_META_CACHE_ITEMS: u64 = 3000;
// default size of cached bloom filter index (in bytes), 1G
static DEFAULT_BLOOM_INDEX_COLUMN_CACHE_SIZE: u64 = 1024 * 1024 * 1024;
// default number of file meta data cached, default 3000 items
static DEFAULT_FILE_META_DATA_CACHE_ITEMS: u64 = 3000;

/// Where all the caches reside
pub struct CacheManager {
table_snapshot_cache: Option<TableSnapshotCache>,
segment_info_cache: Option<SegmentInfoCache>,
bloom_index_cache: Option<BloomIndexCache>,
bloom_index_data_cache: Option<BloomIndexCache>,
bloom_index_meta_cache: Option<BloomIndexMetaCache>,
file_meta_data_cache: Option<FileMetaDataCache>,
cluster_id: String,
Expand All @@ -58,7 +53,7 @@ impl CacheManager {
v.init(Arc::new(Self {
table_snapshot_cache: None,
segment_info_cache: None,
bloom_index_cache: None,
bloom_index_data_cache: None,
bloom_index_meta_cache: None,
file_meta_data_cache: None,
cluster_id: config.cluster_id.clone(),
Expand All @@ -69,15 +64,17 @@ impl CacheManager {
} else {
let table_snapshot_cache = Self::new_item_cache(config.table_cache_snapshot_count);
let segment_info_cache = Self::new_item_cache(config.table_cache_segment_count);
let bloom_index_cache = Self::new_bytes_cache(DEFAULT_BLOOM_INDEX_COLUMN_CACHE_SIZE);
let bloom_index_meta_cache = Self::new_item_cache(DEFAULT_BLOOM_INDEX_META_CACHE_ITEMS);
let bloom_index_data_cache =
Self::new_bytes_cache(config.table_cache_bloom_index_data_bytes);
let bloom_index_meta_cache =
Self::new_item_cache(config.table_cache_bloom_index_meta_count);

let file_meta_data_cache = Self::new_item_cache(DEFAULT_FILE_META_DATA_CACHE_ITEMS);

v.init(Arc::new(Self {
table_snapshot_cache,
segment_info_cache,
bloom_index_cache,
bloom_index_data_cache,
bloom_index_meta_cache,
file_meta_data_cache,
cluster_id: config.cluster_id.clone(),
Expand Down Expand Up @@ -106,7 +103,7 @@ impl CacheManager {
}

pub fn get_bloom_index_cache(&self) -> Option<BloomIndexCache> {
self.bloom_index_cache.clone()
self.bloom_index_data_cache.clone()
}

pub fn get_bloom_index_meta_cache(&self) -> Option<BloomIndexMetaCache> {
Expand Down

1 comment on commit d9a2aac

@vercel
Copy link

@vercel vercel bot commented on d9a2aac Oct 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

databend – ./

databend-git-main-databend.vercel.app
databend.rs
databend-databend.vercel.app
databend.vercel.app

Please sign in to comment.