Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add vacuum impl #11320

Merged
merged 16 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ members = [
"src/query/storages/parquet",
"src/query/storages/result_cache",
"src/query/users",
"src/query/interface-manager",
# databend-query
"src/query/service",
# enterprise
Expand Down
1 change: 1 addition & 0 deletions src/binaries/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ common-config = { path = "../query/config" }
common-exception = { path = "../common/exception" }
common-expression = { path = "../query/expression" }
common-grpc = { path = "../common/grpc" }
common-license = { path = "../common/license" }
common-meta-api = { path = "../meta/api" }
common-meta-app = { path = "../meta/app" }
common-meta-client = { path = "../meta/client" }
Expand Down
4 changes: 4 additions & 0 deletions src/binaries/query/oss_main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ use common_base::mem_allocator::GlobalAllocator;
use common_base::runtime::Runtime;
use common_config::InnerConfig;
use common_exception::Result;
use common_license::license_manager::LicenseManager;
use common_license::license_manager::OssLicenseManager;

use crate::entry::init_services;
use crate::entry::start_services;
Expand All @@ -47,5 +49,7 @@ fn main() {
async fn main_entrypoint() -> Result<()> {
let conf: InnerConfig = InnerConfig::load()?;
init_services(&conf).await?;
// init oss license manager
OssLicenseManager::init()?;
BohuTANG marked this conversation as resolved.
Show resolved Hide resolved
start_services(&conf).await
}
28 changes: 28 additions & 0 deletions src/common/license/src/license_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use std::sync::Arc;

use common_base::base::GlobalInstance;
use common_exception::ErrorCode;
use common_exception::Result;
use jwt_simple::claims::JWTClaims;

Expand Down Expand Up @@ -61,6 +62,33 @@ pub struct LicenseManagerWrapper {
unsafe impl Send for LicenseManagerWrapper {}
unsafe impl Sync for LicenseManagerWrapper {}

pub struct OssLicenseManager {}

impl LicenseManager for OssLicenseManager {
fn init() -> Result<()> {
let rm = OssLicenseManager {};
let wrapper = LicenseManagerWrapper {
manager: Box::new(rm),
};
GlobalInstance::set(Arc::new(wrapper));
Ok(())
}

fn instance() -> Arc<Box<dyn LicenseManager>> {
GlobalInstance::get()
}

fn is_active(&self) -> bool {
false
}

fn make_license(_raw: &str) -> Result<JWTClaims<LicenseInfo>> {
Err(ErrorCode::LicenceDenied(
"Need Commercial License".to_string(),
))
}
}

pub fn get_license_manager() -> Arc<LicenseManagerWrapper> {
GlobalInstance::get()
}
1 change: 1 addition & 0 deletions src/query/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ Databend Query is a Distributed Query Engine at scale.
- [`streams`](./streams/) contains data sources and streams.
- [`users`](./users/), role-based access and control.
- [`ee`](ee/) contains enterprise functionalities.
- [`interface_manager`](interface_manager/) contains enterprise interface manager.
10 changes: 10 additions & 0 deletions src/query/ee/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,20 @@ test = false
# Workspace dependencies
async-backtrace = { workspace = true }
common-base = { path = "../../common/base" }
common-catalog = { path = "../catalog" }
common-config = { path = "../config" }
common-exception = { path = "../../common/exception" }
common-license = { path = "../../common/license" }
common-storages-fuse = { path = "../storages/fuse" }
databend-query = { path = "../service" }
interface-manager = { path = "../interface-manager" }
storages-common-cache = { path = "../storages/common/cache" }
storages-common-table-meta = { path = "../storages/common/table-meta" }

async-trait = "0.1.57"
chrono = { workspace = true }
jwt-simple = "0.11.0"
tracing = "0.1.36"

[build-dependencies]
common-building = { path = "../../common/building" }
Expand Down
2 changes: 2 additions & 0 deletions src/query/ee/src/enterprise_services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ use common_config::InnerConfig;
use common_exception::Result;
use common_license::license_manager::LicenseManager;

use crate::interface::interface_mgr::RealInterfaceManager;
use crate::license::license_mgr::RealLicenseManager;

pub struct EnterpriseServices;
impl EnterpriseServices {
#[async_backtrace::framed]
pub async fn init(_config: InnerConfig) -> Result<()> {
RealLicenseManager::init()?;
RealInterfaceManager::init()?;
Ok(())
}
}
49 changes: 49 additions & 0 deletions src/query/ee/src/interface/interface_mgr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright 2023 Databend Cloud
//
// Licensed under the Elastic License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.elastic.co/licensing/elastic-license
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::sync::Arc;

use chrono::DateTime;
use chrono::Utc;
use common_base::base::GlobalInstance;
use common_catalog::table_context::TableContext;
use common_exception::Result;
use common_storages_fuse::FuseTable;
use interface_manager::InterfaceManager;
use interface_manager::InterfaceManagerWrapper;

use crate::storages::fuse::do_vacuum;

pub struct RealInterfaceManager {}

#[async_trait::async_trait]
impl InterfaceManager for RealInterfaceManager {
async fn do_vacuum(
&self,
fuse_table: &FuseTable,
ctx: Arc<dyn TableContext>,
retention_time: DateTime<Utc>,
) -> Result<()> {
do_vacuum(fuse_table, ctx, retention_time).await
}
}

impl RealInterfaceManager {
pub fn init() -> Result<()> {
let rm = RealInterfaceManager {};
let wrapper = InterfaceManagerWrapper::new(Box::new(rm));
GlobalInstance::set(Arc::new(wrapper));
Ok(())
}
}
16 changes: 16 additions & 0 deletions src/query/ee/src/interface/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2023 Databend Cloud
//
// Licensed under the Elastic License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.elastic.co/licensing/elastic-license
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod interface_mgr;
pub use interface_mgr::RealInterfaceManager;
2 changes: 2 additions & 0 deletions src/query/ee/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@
// limitations under the License.

pub mod enterprise_services;
pub mod interface;
pub mod license;
pub mod storages;
1 change: 1 addition & 0 deletions src/query/ee/src/license/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
// limitations under the License.

pub mod license_mgr;
pub use license_mgr::RealLicenseManager;
15 changes: 15 additions & 0 deletions src/query/ee/src/storages/fuse/io/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright 2023 Databend Cloud
//
// Licensed under the Elastic License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.elastic.co/licensing/elastic-license
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod snapshots;
103 changes: 103 additions & 0 deletions src/query/ee/src/storages/fuse/io/snapshots.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright 2023 Databend Cloud
//
// Licensed under the Elastic License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.elastic.co/licensing/elastic-license
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Read all the referenced segments by all the snapshot file.
// limit: limits the number of snapshot files listed

use std::sync::Arc;
use std::time::Instant;

use common_exception::Result;
use common_storages_fuse::io::SnapshotLiteExtended;
use common_storages_fuse::io::SnapshotsIO;
use storages_common_table_meta::meta::Location;
use tracing::info;

#[allow(clippy::too_many_arguments)]
#[async_backtrace::framed]
pub async fn get_snapshot_referenced_segments<T>(
BohuTANG marked this conversation as resolved.
Show resolved Hide resolved
snapshots_io: &SnapshotsIO,
root_snapshot_location: String,
root_snapshot_lite: Arc<SnapshotLiteExtended>,
status_callback: T,
) -> Result<Option<Vec<Location>>>
where
T: Fn(String),
{
let ctx = snapshots_io.get_ctx();

// List all the snapshot file paths
// note that snapshot file paths of ongoing txs might be included
let mut snapshot_files = vec![];
if let Some(prefix) = SnapshotsIO::get_s3_prefix_from_file(&root_snapshot_location) {
snapshot_files =
SnapshotsIO::list_files(snapshots_io.get_operator(), &prefix, None).await?;
}

if snapshot_files.is_empty() {
return Ok(None);
}

// 1. Get all the snapshot by chunks, save all the segments location.
let max_io_requests = ctx.get_settings().get_max_storage_io_requests()? as usize;

let start = Instant::now();
let mut count = 1;
// 2. Get all the referenced segments
let mut segments = vec![];
// first save root snapshot segments
root_snapshot_lite.segments.iter().for_each(|location| {
segments.push(location.to_owned());
});
for chunk in snapshot_files.chunks(max_io_requests) {
// Since we want to get all the snapshot referenced files, so set `ignore_timestamp` true
let results = snapshots_io
.read_snapshot_lite_extends(chunk, root_snapshot_lite.clone(), true)
.await?;

results
.into_iter()
.flatten()
.for_each(|snapshot_lite_extend| {
snapshot_lite_extend.segments.iter().for_each(|location| {
segments.push(location.to_owned());
});
});

// Refresh status.
{
count += chunk.len();
let status = format!(
"gc orphan: read snapshot files:{}/{}, segment files: {}, cost:{} sec",
count,
snapshot_files.len(),
segments.len(),
start.elapsed().as_secs()
);
info!(status);
(status_callback)(status);
}
}

Ok(Some(segments))
}

#[async_backtrace::framed]
async fn get_files_by_prefix(snapshots_io: &SnapshotsIO, input_file: &str) -> Result<Vec<String>> {
if let Some(prefix) = SnapshotsIO::get_s3_prefix_from_file(input_file) {
SnapshotsIO::list_files(snapshots_io.get_operator(), &prefix, None).await
} else {
Ok(vec![])
}
}
Loading