Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions be/src/cloud/cloud_meta_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,15 @@ Status CloudMetaMgr::get_storage_vault_info(StorageVaultInfos* vault_infos, bool
j->mutable_obj_info()->set_sk(j->obj_info().sk().substr(0, 2) + "xxx");
}

for (int i = 0; i < resp.obj_info_size(); ++i) {
resp.mutable_obj_info(i)->set_ak(hide_access_key(resp.obj_info(i).sk()));
}
for (int i = 0; i < resp.storage_vault_size(); ++i) {
auto* j = resp.mutable_storage_vault(i);
if (!j->has_obj_info()) continue;
j->mutable_obj_info()->set_sk(hide_access_key(j->obj_info().sk()));
}

LOG(INFO) << "get storage vault, enable_storage_vault=" << *is_vault_mode
<< " response=" << resp.ShortDebugString();
return Status::OK();
Expand Down
26 changes: 26 additions & 0 deletions be/src/util/s3_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -574,4 +574,30 @@ S3Conf S3Conf::get_s3_conf(const TS3StorageParam& param) {
return ret;
}

std::string hide_access_key(const std::string& ak) {
std::string key = ak;
size_t key_len = key.length();
size_t reserved_count;
if (key_len > 7) {
reserved_count = 6;
} else if (key_len > 2) {
reserved_count = key_len - 2;
} else {
reserved_count = 0;
}

size_t x_count = key_len - reserved_count;
size_t left_x_count = (x_count + 1) / 2;

if (left_x_count > 0) {
key.replace(0, left_x_count, left_x_count, 'x');
}

if (x_count - left_x_count > 0) {
key.replace(key_len - (x_count - left_x_count), x_count - left_x_count,
x_count - left_x_count, 'x');
}
return key;
}

} // end namespace doris
8 changes: 5 additions & 3 deletions be/src/util/s3_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ extern bvar::LatencyRecorder s3_get_bucket_version_latency;
extern bvar::LatencyRecorder s3_copy_object_latency;
}; // namespace s3_bvar

std::string hide_access_key(const std::string& ak);

class S3URI;
struct S3ClientConf {
std::string endpoint;
Expand Down Expand Up @@ -107,9 +109,9 @@ struct S3ClientConf {
"(ak={}, token={}, endpoint={}, region={}, bucket={}, max_connections={}, "
"request_timeout_ms={}, connect_timeout_ms={}, use_virtual_addressing={}, "
"cred_provider_type={},role_arn={}, external_id={}",
ak, token, endpoint, region, bucket, max_connections, request_timeout_ms,
connect_timeout_ms, use_virtual_addressing, cred_provider_type, role_arn,
external_id);
hide_access_key(ak), token, endpoint, region, bucket, max_connections,
request_timeout_ms, connect_timeout_ms, use_virtual_addressing, cred_provider_type,
role_arn, external_id);
}
};

Expand Down
79 changes: 79 additions & 0 deletions be/test/util/s3_util_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "util/s3_util.h"

#include <gtest/gtest-test-part.h>

#include <string>

#include "gtest/gtest_pred_impl.h"
#include "util/s3_uri.h"

namespace doris {

class S3UTILTest : public testing::Test {
public:
S3UTILTest() = default;
~S3UTILTest() = default;
}; // end class S3UTILTest

TEST_F(S3UTILTest, hide_access_key_empty) {
EXPECT_EQ("", hide_access_key(""));
}

TEST_F(S3UTILTest, hide_access_key_single_char) {
EXPECT_EQ("x", hide_access_key("A"));
}

TEST_F(S3UTILTest, hide_access_key_two_chars) {
EXPECT_EQ("xx", hide_access_key("AB"));
}

TEST_F(S3UTILTest, hide_access_key_three_chars) {
EXPECT_EQ("xBx", hide_access_key("ABC"));
}

TEST_F(S3UTILTest, hide_access_key_four_chars) {
EXPECT_EQ("xBCx", hide_access_key("ABCD"));
}

TEST_F(S3UTILTest, hide_access_key_six_chars) {
EXPECT_EQ("xBCDEx", hide_access_key("ABCDEF"));
}

TEST_F(S3UTILTest, hide_access_key_seven_chars) {
EXPECT_EQ("xBCDEFx", hide_access_key("ABCDEFG"));
}

TEST_F(S3UTILTest, hide_access_key_normal_length) {
EXPECT_EQ("xxxDEFGHIxxx", hide_access_key("ABCDEFGHIJKL"));
}

TEST_F(S3UTILTest, hide_access_key_long_key) {
std::string long_key = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";
std::string result = hide_access_key(long_key);
EXPECT_EQ("xxxxxxxxxxxxxxxPQRSTUxxxxxxxxxxxxxxx", result);
}

TEST_F(S3UTILTest, hide_access_key_typical_aws_key) {
std::string aws_key = "AKIAIOSFODNN7EXAMPLE";
std::string result = hide_access_key(aws_key);
EXPECT_EQ("xxxxxxxFODNN7xxxxxxx", result);
}

} // end namespace doris
26 changes: 26 additions & 0 deletions cloud/src/meta-service/meta_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3112,4 +3112,30 @@ void MetaServiceImpl::get_schema_dict(::google::protobuf::RpcController* control
response->mutable_schema_dict()->Swap(&schema_dict);
}

std::string hide_access_key(const std::string& ak) {
std::string key = ak;
size_t key_len = key.length();
size_t reserved_count;
if (key_len > 6) {
reserved_count = 6;
} else if (key_len > 2) {
reserved_count = key_len - 2;
} else {
reserved_count = 0;
}

size_t x_count = key_len - reserved_count;
size_t left_x_count = x_count / 2;

if (left_x_count > 0) {
key.replace(0, left_x_count, left_x_count, 'x');
}

if (x_count - left_x_count > 0) {
key.replace(key_len - (x_count - left_x_count), x_count - left_x_count,
x_count - left_x_count, 'x');
}
return key;
}

} // namespace doris::cloud
84 changes: 61 additions & 23 deletions cloud/src/meta-service/meta_service_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,43 +54,72 @@ inline std::string md5(const std::string& str) {
return ss.str();
}

std::string hide_access_key(const std::string& ak);

// is_handle_sk: true for encrypting sk, false for hiding ak
inline void process_ak_sk_pattern(std::string& str, const std::string& pattern, bool is_handle_sk) {
size_t pos = 0;
while ((pos = str.find(pattern, pos)) != std::string::npos) {
size_t colon_pos = str.find(':', pos);
if (colon_pos == std::string::npos) {
pos += pattern.length();
continue;
}

size_t quote_pos = str.find('\"', colon_pos);
if (quote_pos == std::string::npos) {
pos += pattern.length();
continue;
}

size_t value_start = quote_pos + 1;
size_t value_end = str.find('\"', value_start);
if (value_end == std::string::npos) {
pos = value_start;
continue;
}

std::string key_value = str.substr(value_start, value_end - value_start);

if (is_handle_sk) {
key_value = "md5: " + md5(key_value);
} else {
key_value = hide_access_key(key_value);
}

str.replace(value_start, value_end - value_start, key_value);

pos = value_end + (key_value.length() - key_value.length());
}
};

/**
* Encrypts all "sk" values in the given debug string with MD5 hashes.
*
* Assumptions:
* - Input string contains one or more occurrences of "sk: " followed by a value in double quotes.
* - An md5() function exists that takes a std::string and returns its MD5 hash as a string.
*
* @param debug_string Input string containing "sk: " fields to be encrypted.
* @param debug_string Input string containing "sk: " or ""sk": " fields to be encrypted.
* @return A new string with all "sk" values replaced by their MD5 hashes.
*
* Behavior:
* Behavior for "sk: " format:
* 1. Searches for all occurrences of "sk: " in the input string.
* 2. For each occurrence, extracts the value between double quotes.
* 3. Replaces the original value with "md5: " followed by its MD5 hash.
* 4. Returns the modified string with all "sk" values encrypted.
*/
inline std::string encryt_sk(std::string debug_string) {
// Start position for searching "sk" fields
size_t pos = 0;
// Iterate through the string and find all occurrences of "sk: "
while ((pos = debug_string.find("sk: ", pos)) != std::string::npos) {
// Find the start and end of the "sk" value (assumed to be within quotes)
// Start after the quote
size_t sk_value_start = debug_string.find('\"', pos) + 1;
// End at the next quote
size_t sk_value_end = debug_string.find('\"', sk_value_start);

// Extract the "sk" value
std::string sk_value = debug_string.substr(sk_value_start, sk_value_end - sk_value_start);
// Encrypt the "sk" value with MD5
std::string encrypted_sk = "md5: " + md5(sk_value);

// Replace the original "sk" value with the encrypted MD5 value
debug_string.replace(sk_value_start, sk_value_end - sk_value_start, encrypted_sk);
// Move the position to the end of the current "sk" field and continue searching
pos = sk_value_end;
}
process_ak_sk_pattern(debug_string, "sk: ", true);
process_ak_sk_pattern(debug_string, "\"sk\"", true);

return debug_string;
}

inline std::string hide_ak(std::string debug_string) {
process_ak_sk_pattern(debug_string, "ak: ", false);
process_ak_sk_pattern(debug_string, "\"ak\"", false);

return debug_string;
}

Expand Down Expand Up @@ -134,6 +163,13 @@ void begin_rpc(std::string_view func_name, brpc::Controller* ctrl, const Request
<< " lock_id=" << req->lock_id() << " initiator=" << req->initiator()
<< " expiration=" << req->expiration()
<< " require_compaction_stats=" << req->require_compaction_stats();
} else if constexpr (std::is_same_v<Request, CreateInstanceRequest> ||
std::is_same_v<Request, CreateStageRequest>) {
std::string debug_string = encryt_sk(req->ShortDebugString());
debug_string = hide_ak(debug_string);
TEST_SYNC_POINT_CALLBACK("sk_begin_rpc", &debug_string);
LOG(INFO) << "begin " << func_name << " remote_caller=" << ctrl->remote_side()
<< " original_client_ip=" << req->request_ip() << " request=" << debug_string;
} else {
LOG(INFO) << "begin " << func_name << " remote_caller=" << ctrl->remote_side()
<< " original_client_ip=" << req->request_ip()
Expand Down Expand Up @@ -179,8 +215,10 @@ void finish_rpc(std::string_view func_name, brpc::Controller* ctrl, const Reques
<< " original_client_ip=" << req->request_ip()
<< " status=" << res->status().ShortDebugString();
} else if constexpr (std::is_same_v<Response, GetObjStoreInfoResponse> ||
std::is_same_v<Response, GetStageResponse>) {
std::is_same_v<Response, GetStageResponse> ||
std::is_same_v<Response, GetInstanceResponse>) {
std::string debug_string = encryt_sk(res->DebugString());
debug_string = hide_ak(debug_string);
TEST_SYNC_POINT_CALLBACK("sk_finish_rpc", &debug_string);
LOG(INFO) << "finish " << func_name << " remote_caller=" << ctrl->remote_side()
<< " original_client_ip=" << req->request_ip() << " response=" << debug_string;
Expand Down
11 changes: 7 additions & 4 deletions cloud/src/meta-service/meta_service_http.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include "common/configbase.h"
#include "common/logging.h"
#include "common/string_util.h"
#include "meta-service/meta_service_helper.h"
#include "meta-store/keys.h"
#include "meta-store/txn_kv.h"
#include "meta-store/txn_kv_error.h"
Expand All @@ -63,7 +64,7 @@ namespace doris::cloud {
auto st = parse_json_message(unresolved_path, body, &req); \
if (!st.ok()) { \
std::string msg = "parse http request '" + unresolved_path + "': " + st.ToString(); \
LOG_WARNING(msg).tag("body", body); \
LOG_WARNING(msg).tag("body", encryt_sk(body)); \
return http_json_reply(MetaServiceCode::PROTOBUF_PARSE_ERR, msg); \
} \
} while (0)
Expand All @@ -86,7 +87,7 @@ static google::protobuf::util::Status parse_json_message(const std::string& unre
if (!st.ok()) {
std::string msg = "failed to strictly parse http request for '" + unresolved_path +
"' error: " + st.ToString();
LOG_WARNING(msg).tag("body", body);
LOG_WARNING(msg).tag("body", encryt_sk(hide_access_key(body)));

// ignore unknown fields
google::protobuf::util::JsonParseOptions json_parse_options;
Expand Down Expand Up @@ -776,6 +777,8 @@ void MetaServiceImpl::http(::google::protobuf::RpcController* controller,
LOG(INFO) << "rpc from " << cntl->remote_side()
<< " request: " << cntl->http_request().uri().path();
std::string http_request = format_http_request(cntl);
std::string http_request_for_log = encryt_sk(http_request);
http_request_for_log = hide_ak(http_request_for_log);

// Auth
auto token = http_query(cntl->http_request().uri(), "token");
Expand All @@ -786,7 +789,7 @@ void MetaServiceImpl::http(::google::protobuf::RpcController* controller,
cntl->response_attachment().append(body);
cntl->response_attachment().append("\n");
LOG(WARNING) << "failed to handle http from " << cntl->remote_side()
<< " request: " << http_request << " msg: " << body;
<< " request: " << http_request_for_log << " msg: " << body;
return;
}

Expand All @@ -806,7 +809,7 @@ void MetaServiceImpl::http(::google::protobuf::RpcController* controller,
int ret = cntl->http_response().status_code();
LOG(INFO) << (ret == 200 ? "succ to " : "failed to ") << __PRETTY_FUNCTION__ << " "
<< cntl->remote_side() << " request=\n"
<< http_request << "\n ret=" << ret << " msg=" << msg;
<< http_request_for_log << "\n ret=" << ret << " msg=" << msg;
}

} // namespace doris::cloud
Loading
Loading