Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions be/src/agent/task_worker_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1296,9 +1296,9 @@ void push_storage_policy_callback(StorageEngine& engine, const TAgentTaskRequest
? resource.hdfs_storage_param.root_path
: "";
if (existed_resource.fs == nullptr) {
st = io::HdfsFileSystem::create(resource.hdfs_storage_param,
std::to_string(resource.id), root_path, nullptr,
&fs);
st = io::HdfsFileSystem::create(
resource.hdfs_storage_param, std::to_string(resource.id),
resource.hdfs_storage_param.fs_name, nullptr, &fs, std::move(root_path));
} else {
fs = std::static_pointer_cast<io::HdfsFileSystem>(existed_resource.fs);
}
Expand Down
2 changes: 2 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1381,6 +1381,8 @@ DEFINE_String(test_s3_prefix, "prefix");
#endif
// clang-format on

DEFINE_Bool(enable_root_path_of_hdfs_resource, "false");

std::map<std::string, Register::Field>* Register::_s_field_map = nullptr;
std::map<std::string, std::function<bool()>>* RegisterConfValidator::_s_field_validator = nullptr;
std::map<std::string, std::string>* full_conf_map = nullptr;
Expand Down
5 changes: 5 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1436,6 +1436,11 @@ DECLARE_mInt32(compaction_num_per_round);
// Enable sleep 5s between delete cumulative compaction.
DECLARE_mBool(enable_sleep_between_delete_cumu_compaction);

// Because the root_path for the HDFS resource was previously passed an empty string,
// which was incorrect, this configuration has been added to ensure compatibility
// and guarantee that the root_path works as expected.
DECLARE_Bool(enable_root_path_of_hdfs_resource);

// whether to prune rows with delete sign = 1 in base compaction
// ATTN: this config is only for test
DECLARE_mBool(enable_prune_delete_sign_when_base_compaction);
Expand Down
11 changes: 7 additions & 4 deletions be/src/io/fs/hdfs_file_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,21 +123,24 @@ Status HdfsFileHandleCache::get_file(const std::shared_ptr<HdfsFileSystem>& fs,

Status HdfsFileSystem::create(const THdfsParams& hdfs_params, std::string id,
const std::string& fs_name, RuntimeProfile* profile,
std::shared_ptr<HdfsFileSystem>* fs) {
std::shared_ptr<HdfsFileSystem>* fs, std::string root_path) {
#ifdef USE_HADOOP_HDFS
if (!config::enable_java_support) {
return Status::InternalError(
"hdfs file system is not enabled, you can change be config enable_java_support to "
"true.");
}
#endif
(*fs).reset(new HdfsFileSystem(hdfs_params, std::move(id), fs_name, profile));
(*fs).reset(
new HdfsFileSystem(hdfs_params, std::move(id), fs_name, profile, std::move(root_path)));
return (*fs)->connect();
}

HdfsFileSystem::HdfsFileSystem(const THdfsParams& hdfs_params, std::string id,
const std::string& fs_name, RuntimeProfile* profile)
: RemoteFileSystem("", std::move(id), FileSystemType::HDFS),
const std::string& fs_name, RuntimeProfile* profile,
std::string root_path)
: RemoteFileSystem(config::enable_root_path_of_hdfs_resource ? std::move(root_path) : "",
std::move(id), FileSystemType::HDFS),
_hdfs_params(hdfs_params),
_fs_handle(nullptr),
_profile(profile) {
Expand Down
5 changes: 3 additions & 2 deletions be/src/io/fs/hdfs_file_system.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ class HdfsFileHandleCache;
class HdfsFileSystem final : public RemoteFileSystem {
public:
static Status create(const THdfsParams& hdfs_params, std::string id, const std::string& path,
RuntimeProfile* profile, std::shared_ptr<HdfsFileSystem>* fs);
RuntimeProfile* profile, std::shared_ptr<HdfsFileSystem>* fs,
std::string root_path = "");

~HdfsFileSystem() override;

Expand Down Expand Up @@ -130,7 +131,7 @@ class HdfsFileSystem final : public RemoteFileSystem {
private:
friend class HdfsFileWriter;
HdfsFileSystem(const THdfsParams& hdfs_params, std::string id, const std::string& path,
RuntimeProfile* profile);
RuntimeProfile* profile, std::string root_path);
const THdfsParams& _hdfs_params;
std::string _fs_name;
std::shared_ptr<HdfsFileSystemHandle> _fs_handle = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import org.apache.doris.regression.util.JdbcUtils
import org.apache.doris.regression.util.Hdfs
import org.apache.doris.regression.util.SuiteUtils
import org.apache.doris.regression.util.DebugPoint
import org.apache.hadoop.fs.FileSystem
import org.junit.jupiter.api.Assertions

import org.slf4j.Logger
Expand Down Expand Up @@ -92,6 +93,7 @@ class Suite implements GroovyInterceptable {
final List<Future> lazyCheckFutures = new Vector<>()

private AmazonS3 s3Client = null
private FileSystem fs = null

Suite(String name, String group, SuiteContext context, SuiteCluster cluster) {
this.name = name
Expand Down Expand Up @@ -843,6 +845,16 @@ class Suite implements GroovyInterceptable {
return enableHdfs.equals("true");
}

synchronized FileSystem getHdfs() {
if (fs == null) {
String hdfsFs = context.config.otherConfigs.get("hdfsFs")
String hdfsUser = context.config.otherConfigs.get("hdfsUser")
Hdfs hdfs = new Hdfs(hdfsFs, hdfsUser, context.config.dataPath + "/")
fs = hdfs.fs
}
return fs
}

String uploadToHdfs(String localFile) {
// as group can be rewrite the origin data file not relate to group
String dataDir = context.config.dataPath + "/"
Expand Down
2 changes: 2 additions & 0 deletions regression-test/pipeline/p0/conf/be.conf
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,5 @@ cold_data_compaction_interval_sec=60

# This feature has bug, so by default is false, only open it in pipeline to observe
enable_parquet_page_index=true

enable_root_path_of_hdfs_resource=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import org.apache.hadoop.fs.Path
import java.util.function.Supplier

suite("test_cold_data_compaction_by_hdfs") {

if (!enableHdfs()) {
logger.info("Skip this case, because HDFS is not available")
return
}

def retryUntilTimeout = { int timeoutSecond, Supplier<Boolean> closure ->
long start = System.currentTimeMillis()
while (true) {
if (closure.get()) {
return
} else {
if (System.currentTimeMillis() - start > timeoutSecond * 1000) {
throw new RuntimeException("" +
"Operation timeout, maybe you need to check " +
"remove_unused_remote_files_interval_sec and " +
"cold_data_compaction_interval_sec in be.conf")
} else {
sleep(10_000)
}
}
}
}

String suffix = UUID.randomUUID().hashCode().abs().toString()
String prefix = "${getHdfsDataDir()}/regression/cold_data_compaction"
multi_sql """
DROP TABLE IF EXISTS t_recycle_in_hdfs;
DROP STORAGE POLICY IF EXISTS test_policy_${suffix};
DROP RESOURCE IF EXISTS 'remote_hdfs_${suffix}';
CREATE RESOURCE "remote_hdfs_${suffix}"
PROPERTIES
(
"type"="hdfs",
"fs.defaultFS"="${getHdfsFs()}",
"hadoop.username"="${getHdfsUser()}",
"hadoop.password"="${getHdfsPasswd()}",
"root_path"="${prefix}"
);
CREATE STORAGE POLICY test_policy_${suffix}
PROPERTIES(
"storage_resource" = "remote_hdfs_${suffix}",
"cooldown_ttl" = "5"
);
CREATE TABLE IF NOT EXISTS t_recycle_in_hdfs
(
k1 BIGINT,
k2 LARGEINT,
v1 VARCHAR(2048)
)
DISTRIBUTED BY HASH (k1) BUCKETS 1
PROPERTIES(
"storage_policy" = "test_policy_${suffix}",
"disable_auto_compaction" = "true",
"replication_num" = "1"
);
"""

// insert 5 RowSets
multi_sql """
insert into t_recycle_in_hdfs values(1, 1, 'Tom');
insert into t_recycle_in_hdfs values(2, 2, 'Jelly');
insert into t_recycle_in_hdfs values(3, 3, 'Spike');
insert into t_recycle_in_hdfs values(4, 4, 'Tyke');
insert into t_recycle_in_hdfs values(5, 5, 'Tuffy');
"""

// wait until files upload to S3
retryUntilTimeout(1800, {
def res = sql_return_maparray "show data from t_recycle_in_hdfs"
String size = ""
String remoteSize = ""
for (final def line in res) {
if ("t_recycle_in_hdfs".equals(line.TableName)) {
size = line.Size
remoteSize = line.RemoteSize
break
}
}
logger.info("waiting for data to be uploaded to HDFS: t_recycle_in_hdfs's local data size: ${size}, remote data size: ${remoteSize}")
return size.startsWith("0") && !remoteSize.startsWith("0")
})

String tabletId = sql_return_maparray("show tablets from t_recycle_in_hdfs")[0].TabletId
// check number of remote files
def filesBeforeCompaction = getHdfs().listStatus(new Path(prefix + "/data/${tabletId}"))

// 5 RowSets + 1 meta
assertEquals(6, filesBeforeCompaction.size())

// trigger cold data compaction
sql """alter table t_recycle_in_hdfs set ("disable_auto_compaction" = "false")"""

// wait until compaction finish
retryUntilTimeout(1800, {
def filesAfterCompaction = getHdfs().listStatus(new Path(prefix + "/data/${tabletId}"))
logger.info("t_recycle_in_hdfs's remote file number is ${filesAfterCompaction.size()}")
// 1 RowSet + 1 meta
return filesAfterCompaction.size() == 2
})

sql "drop table t_recycle_in_hdfs force"
retryUntilTimeout(1800, {
def pathExists = getHdfs().exists(new Path(prefix + "/data/${tabletId}"))
logger.info("after drop t_recycle_in_hdfs, the remote file path ${pathExists ? "exists" : "not exists"}")
return !pathExists
})
}
Loading