diff --git a/.github/licenserc.yml b/.github/licenserc.yml index 2eebf892ae0..f9323be8d29 100644 --- a/.github/licenserc.yml +++ b/.github/licenserc.yml @@ -39,6 +39,7 @@ header: - 'Cargo.lock' - 'Cargo.toml' - 'rust-toolchain' + - 'rust-toolchain.toml' - '.devcontainer/' - '**/OWNERS' - 'OWNERS_ALIASES' diff --git a/contrib/client-c b/contrib/client-c index 0a763f5f33e..099157bf7d9 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit 0a763f5f33e342802d0e188e8544540a015e808d +Subproject commit 099157bf7d9e20eac73a38c5490467b80427761f diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index 7dc50b4eb06..b8c00d3953f 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit 7dc50b4eb06124e31f03adb06c20ff7ab61c5f79 +Subproject commit b8c00d3953fc847a4958cec69b69ad5aa45c8a6d diff --git a/contrib/tiflash-proxy-cmake/CMakeLists.txt b/contrib/tiflash-proxy-cmake/CMakeLists.txt index c752f1c3ebe..29123c59257 100644 --- a/contrib/tiflash-proxy-cmake/CMakeLists.txt +++ b/contrib/tiflash-proxy-cmake/CMakeLists.txt @@ -87,7 +87,7 @@ add_custom_command(OUTPUT ${_TIFLASH_PROXY_LIBRARY} WORKING_DIRECTORY ${_TIFLASH_PROXY_SOURCE_DIR} DEPENDS "${_TIFLASH_PROXY_SRCS}" "${_TIFLASH_PROXY_SOURCE_DIR}/Cargo.lock" - "${TiFlash_SOURCE_DIR}/rust-toolchain") + "${TiFlash_SOURCE_DIR}/rust-toolchain.toml") add_custom_target(tiflash_proxy ALL DEPENDS ${_TIFLASH_PROXY_LIBRARY}) add_library(libtiflash_proxy SHARED IMPORTED GLOBAL) diff --git a/dbms/src/Common/ArrayCache.h b/dbms/src/Common/ArrayCache.h index 6457a60e1b0..ecfa75b82a1 100644 --- a/dbms/src/Common/ArrayCache.h +++ b/dbms/src/Common/ArrayCache.h @@ -378,8 +378,6 @@ class ArrayCache : private boost::noncopyable { --left_it; - //std::cerr << "left_it->isFree(): " << left_it->isFree() << "\n"; - if (left_it->chunk == region.chunk && left_it->isFree()) { region.size += left_it->size; @@ -392,8 +390,6 @@ class ArrayCache : private boost::noncopyable ++right_it; if (right_it != adjacency_list.end()) { - //std::cerr << "right_it->isFree(): " << right_it->isFree() << "\n"; - if (right_it->chunk == region.chunk && right_it->isFree()) { region.size += right_it->size; @@ -402,8 +398,6 @@ class ArrayCache : private boost::noncopyable } } - //std::cerr << "size is enlarged: " << was_size << " -> " << region.size << "\n"; - size_multimap.insert(region); } @@ -433,10 +427,6 @@ class ArrayCache : private boost::noncopyable if (lru_list.empty()) return nullptr; - /*for (const auto & elem : adjacency_list) - std::cerr << (!elem.SizeMultimapHook::is_linked() ? "\033[1m" : "") << elem.size << (!elem.SizeMultimapHook::is_linked() ? "\033[0m " : " "); - std::cerr << '\n';*/ - auto it = adjacency_list.iterator_to(lru_list.front()); while (true) @@ -544,8 +534,6 @@ class ArrayCache : private boost::noncopyable return allocateFromFreeRegion(*free_region, size); } - // std::cerr << "Requested size: " << size << "\n"; - /// Evict something from cache and continue. while (true) { diff --git a/dbms/src/Common/FieldVisitors.cpp b/dbms/src/Common/FieldVisitors.cpp index bf1f238a12f..6ca3dfd1b78 100644 --- a/dbms/src/Common/FieldVisitors.cpp +++ b/dbms/src/Common/FieldVisitors.cpp @@ -221,57 +221,121 @@ String FieldVisitorToString::operator()(const Tuple & x_def) const String FieldVisitorToDebugString::operator()(const Null &) const { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - return "NULL"; + case RedactMode::Disable: + return "NULL"; + case RedactMode::Marker: + return Redact::toMarkerString("NULL", /*ignore_escape*/ true); + } } String FieldVisitorToDebugString::operator()(const UInt64 & x) const { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - return formatQuoted(x); + case RedactMode::Disable: + return formatQuoted(x); + case RedactMode::Marker: + return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true); + } } String FieldVisitorToDebugString::operator()(const Int64 & x) const { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - return formatQuoted(x); + case RedactMode::Disable: + return formatQuoted(x); + case RedactMode::Marker: + return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true); + } } String FieldVisitorToDebugString::operator()(const Float64 & x) const { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - return formatFloat(x); + case RedactMode::Disable: + return formatFloat(x); + case RedactMode::Marker: + return Redact::toMarkerString(formatFloat(x), /*ignore_escape*/ true); + } } String FieldVisitorToDebugString::operator()(const String & x) const { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - return formatQuoted(x); + case RedactMode::Disable: + return formatQuoted(x); + case RedactMode::Marker: + // The string may contains utf-8 char that need to be escaped + return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ false); + } } String FieldVisitorToDebugString::operator()(const DecimalField & x) const { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - return formatQuoted(x); + case RedactMode::Disable: + return formatQuoted(x); + case RedactMode::Marker: + return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true); + } } String FieldVisitorToDebugString::operator()(const DecimalField & x) const { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - return formatQuoted(x); + case RedactMode::Disable: + return formatQuoted(x); + case RedactMode::Marker: + return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true); + } } String FieldVisitorToDebugString::operator()(const DecimalField & x) const { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - return formatQuoted(x); + case RedactMode::Disable: + return formatQuoted(x); + case RedactMode::Marker: + return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true); + } } String FieldVisitorToDebugString::operator()(const DecimalField & x) const { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - return formatQuoted(x); + case RedactMode::Disable: + return formatQuoted(x); + case RedactMode::Marker: + return Redact::toMarkerString(formatQuoted(x), /*ignore_escape*/ true); + } } String FieldVisitorToDebugString::operator()(const Array & x) const diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index b761f520d8d..a4f0fe3be03 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -34,13 +34,6 @@ #include -#ifdef DBMS_HASH_MAP_DEBUG_RESIZES -#include - -#include -#include -#endif - /** NOTE HashTable could only be used for memmoveable (position independent) types. * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. * Also, key in hash table must be of type, that zero bytes is compared equals to zero key. @@ -487,9 +480,6 @@ class HashTable if unlikely (!resize_callback()) throw DB::ResizeException("Error in hash table resize"); } -#ifdef DBMS_HASH_MAP_DEBUG_RESIZES - Stopwatch watch; -#endif size_t old_size = grower.bufSize(); @@ -568,12 +558,6 @@ class HashTable if (&buf[i] != &buf[updated_place_value]) Cell::move(&buf[i], &buf[updated_place_value]); } - -#ifdef DBMS_HASH_MAP_DEBUG_RESIZES - watch.stop(); - std::cerr << std::fixed << std::setprecision(3) << "Resize from " << old_size << " to " << grower.bufSize() - << " took " << watch.elapsedSeconds() << " sec." << std::endl; -#endif } diff --git a/dbms/src/Common/OptimizedRegularExpression.inl.h b/dbms/src/Common/OptimizedRegularExpression.inl.h index c3bbaf45a39..d8bcfb7fedd 100644 --- a/dbms/src/Common/OptimizedRegularExpression.inl.h +++ b/dbms/src/Common/OptimizedRegularExpression.inl.h @@ -277,13 +277,6 @@ void OptimizedRegularExpressionImpl::analyze( required_substring = trivial_substrings.front().first; required_substring_is_prefix = trivial_substrings.front().second == 0; } - - /* std::cerr - << "regexp: " << regexp - << ", is_trivial: " << is_trivial - << ", required_substring: " << required_substring - << ", required_substring_is_prefix: " << required_substring_is_prefix - << std::endl;*/ } diff --git a/dbms/src/Common/RedactHelpers.cpp b/dbms/src/Common/RedactHelpers.cpp index dd8e8e38b12..e28eeff7fdb 100644 --- a/dbms/src/Common/RedactHelpers.cpp +++ b/dbms/src/Common/RedactHelpers.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -19,22 +20,110 @@ #include #include +#include -std::atomic Redact::REDACT_LOG = false; +std::atomic Redact::REDACT_LOG = RedactMode::Disable; -void Redact::setRedactLog(bool v) +void Redact::setRedactLog(RedactMode v) { - pingcap::Redact::setRedactLog(v); // set redact flag for client-c + switch (v) + { + case RedactMode::Enable: + pingcap::Redact::setRedactLog(pingcap::RedactMode::Enable); + case RedactMode::Disable: + pingcap::Redact::setRedactLog(pingcap::RedactMode::Disable); + case RedactMode::Marker: + pingcap::Redact::setRedactLog(pingcap::RedactMode::Marker); + } Redact::REDACT_LOG.store(v, std::memory_order_relaxed); } +std::string Redact::toMarkerString(const std::string & raw, bool ignore_escape) +{ + // A shortcut for those caller ensure the `raw` must not contain any char that + // need to be escaped. + if (likely(ignore_escape)) + return fmt::format("‹{}›", raw); + + constexpr static size_t BEGIN_SIZE = std::string_view("‹").size(); + constexpr static size_t END_SIZE = std::string_view("›").size(); + enum class EscapeMark + { + Begin, + End, + }; + // must be an ordered map, + std::map found_pos; + std::string::size_type pos = 0; + do + { + pos = raw.find("‹", pos); + if (pos == std::string::npos) + break; + found_pos.emplace(pos, EscapeMark::Begin); + pos += BEGIN_SIZE; + } while (pos != std::string::npos && pos < raw.size()); + pos = 0; + do + { + pos = raw.find("›", pos); + if (pos == std::string::npos) + break; + found_pos.emplace(pos, EscapeMark::End); + pos += END_SIZE; + } while (pos != std::string::npos && pos < raw.size()); + if (likely(found_pos.empty())) + { + // A shortcut for detecting that nothing to be escaped. + return fmt::format("‹{}›", raw); + } + + // Escape the chars in `raw` to `fmt_buf` + DB::FmtBuffer fmt_buf; + fmt_buf.append("‹"); + pos = 0; // the copy pos from `raw` + for (const auto & [to_escape_pos, to_escape_type] : found_pos) + { + switch (to_escape_type) + { + case EscapeMark::Begin: + { + fmt_buf.append(std::string_view(raw.c_str() + pos, to_escape_pos - pos + BEGIN_SIZE)); + fmt_buf.append("‹"); // append for escape + pos = to_escape_pos + BEGIN_SIZE; // move the copy begin pos from `raw` + break; + } + case EscapeMark::End: + { + fmt_buf.append(std::string_view(raw.c_str() + pos, to_escape_pos - pos + END_SIZE)); + fmt_buf.append("›"); // append for escape + pos = to_escape_pos + END_SIZE; // move the copy begin pos from `raw` + break; + } + } + } + // handle the suffix + if (pos < raw.size()) + fmt_buf.append(std::string_view(raw.c_str() + pos, raw.size() - pos)); + fmt_buf.append("›"); + return fmt_buf.toString(); +} + std::string Redact::handleToDebugString(int64_t handle) { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - - // Encode as string - return DB::toString(handle); + case RedactMode::Disable: + // Encode as string + return DB::toString(handle); + case RedactMode::Marker: + // Note: the `handle` must be int64 so we don't need to care + // about escaping here. + return toMarkerString(DB::toString(handle), /*ignore_escape*/ true); + } } std::string Redact::keyToHexString(const char * key, size_t size) @@ -52,29 +141,44 @@ std::string Redact::keyToHexString(const char * key, size_t size) std::string Redact::keyToDebugString(const char * key, const size_t size) { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: return "?"; - - return Redact::keyToHexString(key, size); + case RedactMode::Disable: + // Encode as string + return Redact::keyToHexString(key, size); + case RedactMode::Marker: + // Note: the `s` must be hexadecimal string so we don't need to care + // about escaping here. + return toMarkerString(Redact::keyToHexString(key, size), /*ignore_escape*/ true); + } } void Redact::keyToDebugString(const char * key, const size_t size, std::ostream & oss) { - if (Redact::REDACT_LOG.load(std::memory_order_relaxed)) + const auto v = Redact::REDACT_LOG.load(std::memory_order_relaxed); + switch (v) + { + case RedactMode::Enable: { oss << "?"; return; } - - // Encode as upper hex string - const auto flags = oss.flags(); - oss << std::uppercase << std::setfill('0') << std::hex; - for (size_t i = 0; i < size; ++i) + case RedactMode::Disable: { - // width need to be set for each output (https://stackoverflow.com/questions/405039/permanent-stdsetw) - oss << std::setw(2) << static_cast(static_cast(key[i])); + oss << Redact::keyToHexString(key, size); + return; + } + case RedactMode::Marker: + { + // Note: the `s` must be hexadecimal string so we don't need to care + // about escaping here. + oss << toMarkerString(Redact::keyToHexString(key, size), /*ignore_escape*/ true); + return; + } } - oss.flags(flags); // restore flags } std::string Redact::hexStringToKey(const char * start, size_t len) @@ -92,4 +196,4 @@ std::string Redact::hexStringToKey(const char * start, size_t len) s.push_back(x); } return s; -} \ No newline at end of file +} diff --git a/dbms/src/Common/RedactHelpers.h b/dbms/src/Common/RedactHelpers.h index ae6e64ebeab..d99620e7b00 100644 --- a/dbms/src/Common/RedactHelpers.h +++ b/dbms/src/Common/RedactHelpers.h @@ -22,10 +22,17 @@ namespace DB class FieldVisitorToDebugString; } +enum class RedactMode +{ + Disable, + Enable, + Marker, +}; + class Redact { public: - static void setRedactLog(bool v); + static void setRedactLog(RedactMode v); static std::string handleToDebugString(int64_t handle); static std::string keyToDebugString(const char * key, size_t size); @@ -37,10 +44,12 @@ class Redact friend class DB::FieldVisitorToDebugString; + static std::string toMarkerString(const std::string & raw, bool ignore_escape = false); + protected: Redact() = default; private: // Log user data to log only when this flag is set to false. - static std::atomic REDACT_LOG; + static std::atomic REDACT_LOG; }; diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 996c1b0a785..ce3849bf7db 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -212,6 +212,11 @@ static_assert(RAFT_REGION_BIG_WRITE_THRES * 4 < RAFT_REGION_BIG_WRITE_MAX, "Inva "The freshness of tiflash data with tikv data", \ Histogram, \ F(type_syncing_data_freshness, {{"type", "data_freshness"}}, ExpBuckets{0.001, 2, 20})) \ + M(tiflash_memory_usage_by_class, \ + "TiFlash memory consumes by class", \ + Gauge, \ + F(type_uni_page_ids, {"type", "uni_page_ids"}), \ + F(type_versioned_entries, {"type", "versioned_entries"})) \ M(tiflash_storage_read_tasks_count, "Total number of storage engine read tasks", Counter) \ M(tiflash_storage_command_count, \ "Total number of storage's command, such as delete range / shutdown /startup", \ @@ -428,6 +433,10 @@ static_assert(RAFT_REGION_BIG_WRITE_THRES * 4 < RAFT_REGION_BIG_WRITE_MAX, "Inva F(type_total, {{"type", "total"}}, ExpBucketsWithRange{0.2, 4, 300}), \ F(type_queue_stage, {{"type", "queue_stage"}}, ExpBucketsWithRange{0.2, 4, 300}), \ F(type_phase1_total, {{"type", "phase1_total"}}, ExpBucketsWithRange{0.2, 4, 300})) \ + M(tiflash_raft_command_throughput, \ + "", \ + Histogram, \ + F(type_prehandle_snapshot, {{"type", "prehandle_snapshot"}}, ExpBuckets{128, 2, 11})) \ M(tiflash_raft_command_duration_seconds, \ "Bucketed histogram of some raft command: apply snapshot and ingest SST", \ Histogram, /* these command usually cost several seconds, increase the start bucket to 50ms */ \ @@ -496,6 +505,7 @@ static_assert(RAFT_REGION_BIG_WRITE_THRES * 4 < RAFT_REGION_BIG_WRITE_MAX, "Inva F(type_flush_log_gap, {{"type", "flush_log_gap"}}), \ F(type_flush_size, {{"type", "flush_size"}}), \ F(type_flush_rowcount, {{"type", "flush_rowcount"}}), \ + F(type_prehandle, {{"type", "prehandle"}}), \ F(type_flush_eager_gc, {{"type", "flush_eager_gc"}})) \ M(tiflash_raft_raft_frequent_events_count, \ "Raft frequent event counter", \ diff --git a/dbms/src/Common/TiFlashSecurity.h b/dbms/src/Common/TiFlashSecurity.h index a02afc9d04e..d951302c76f 100644 --- a/dbms/src/Common/TiFlashSecurity.h +++ b/dbms/src/Common/TiFlashSecurity.h @@ -15,10 +15,12 @@ #pragma once #include #include +#include #include #include #include #include +#include #include #include #include @@ -66,7 +68,7 @@ class TiFlashSecurityConfig : public ConfigObject return has_tls_config; } - bool redactInfoLog() + RedactMode redactInfoLog() { std::unique_lock lock(mu); return redact_info_log; @@ -108,13 +110,13 @@ class TiFlashSecurityConfig : public ConfigObject if (config.has("security.cert_allowed_cn") && has_tls_config) { String verify_cns = config.getString("security.cert_allowed_cn"); - parseAllowedCN(verify_cns); + allowed_common_names = parseAllowedCN(verify_cns); } // Mostly options name are combined with "_", keep this style if (config.has("security.redact_info_log")) { - redact_info_log = config.getBool("security.redact_info_log"); + redact_info_log = parseRedactLog(config.getString("security.redact_info_log")); } return cert_file_updated; } @@ -132,12 +134,42 @@ class TiFlashSecurityConfig : public ConfigObject return false; } - void parseAllowedCN(String verify_cns) + static RedactMode parseRedactLog(const String & config_str) + { + if (Poco::icompare(config_str, "marker") == 0) + return RedactMode::Marker; + + int n; + if (Poco::NumberParser::tryParse(config_str, n)) + { + return ((n == 0) ? RedactMode::Disable : RedactMode::Enable); + } + else if ( + Poco::icompare(config_str, "true") == 0 // + || Poco::icompare(config_str, "yes") == 0 // + || Poco::icompare(config_str, "on") == 0) + { + return RedactMode::Enable; + } + else if ( + Poco::icompare(config_str, "false") == 0 // + || Poco::icompare(config_str, "no") == 0 // + || Poco::icompare(config_str, "off") == 0) + { + return RedactMode::Disable; + } + + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "invalid redact_info_log value, value={}", config_str); + } + + static std::set parseAllowedCN(String verify_cns) { if (verify_cns.size() > 2 && verify_cns[0] == '[' && verify_cns[verify_cns.size() - 1] == ']') { verify_cns = verify_cns.substr(1, verify_cns.size() - 2); } + + std::set common_names; Poco::StringTokenizer string_tokens(verify_cns, ","); for (const auto & string_token : string_tokens) { @@ -146,10 +178,13 @@ class TiFlashSecurityConfig : public ConfigObject { cn = cn.substr(1, cn.size() - 2); } - allowed_common_names.insert(std::move(cn)); + common_names.insert(std::move(cn)); } + return common_names; } + // Return whether grpc_context satisfy the `allowed_common_name` in config + // Mainly used for handling grpc requests bool checkGrpcContext(const grpc::ServerContext * grpc_context) const { std::unique_lock lock(mu); @@ -167,6 +202,8 @@ class TiFlashSecurityConfig : public ConfigObject return false; } + // Return whether cert satisfy the `allowed_common_name` in config + // Mainly used for handling http requests bool checkCommonName(const Poco::Crypto::X509Certificate & cert) { std::unique_lock lock(mu); @@ -233,7 +270,6 @@ class TiFlashSecurityConfig : public ConfigObject String new_ca_path; String new_cert_path; String new_key_path; - bool updated = false; if (config.has("security.ca_path")) { new_ca_path = config.getString("security.ca_path"); @@ -249,8 +285,10 @@ class TiFlashSecurityConfig : public ConfigObject new_key_path = config.getString("security.key_path"); miss_key_path = false; } + if (miss_ca_path && miss_cert_path && miss_key_path) { + // all configs are not exist if (inited && has_tls_config) { LOG_WARNING(log, "Can't remove tls config online"); @@ -259,60 +297,58 @@ class TiFlashSecurityConfig : public ConfigObject { LOG_INFO(log, "No TLS config is set."); } + return false; } else if (miss_ca_path || miss_cert_path || miss_key_path) { + // any of these configs is not exist throw Exception( "ca_path, cert_path, key_path must be set at the same time.", ErrorCodes::INVALID_CONFIG_PARAMETER); } - else + + // all configs are exist + assert(!miss_ca_path && !miss_cert_path && !miss_key_path); + if (inited && !has_tls_config) { - if (inited && !has_tls_config) - { - LOG_WARNING(log, "Can't add TLS config online"); - return false; - } - else - { - has_tls_config = true; - if (new_ca_path != ca_path || new_cert_path != cert_path || new_key_path != key_path) - { - ca_path = new_ca_path; - cert_path = new_cert_path; - key_path = new_key_path; - cert_files.files.clear(); - cert_files.addIfExists(ca_path); - cert_files.addIfExists(cert_path); - cert_files.addIfExists(key_path); - updated = true; - ssl_cerd_options_cached = false; - LOG_INFO( - log, - "Ssl certificate config path is updated: ca path is {} cert path is {} key path is {}", - ca_path, - cert_path, - key_path); - } - else - { - // whether the cert file content is updated - updated = fileUpdated(); - // update cert files - if (updated) - { - FilesChangesTracker new_files; - for (const auto & file : cert_files.files) - { - new_files.addIfExists(file.path); - } - cert_files = std::move(new_files); - ssl_cerd_options_cached = false; - } - } - } + LOG_WARNING(log, "Can't add TLS config online"); + return false; } - return updated; + + has_tls_config = true; // update this->has_tls_config + if (new_ca_path != ca_path || new_cert_path != cert_path || new_key_path != key_path) + { + // any path is changed + ca_path = new_ca_path; + cert_path = new_cert_path; + key_path = new_key_path; + cert_files.files.clear(); + cert_files.addIfExists(ca_path); + cert_files.addIfExists(cert_path); + cert_files.addIfExists(key_path); + ssl_cerd_options_cached = false; + LOG_INFO( + log, + "Ssl certificate config path is updated: ca path is {} cert path is {} key path is {}", + ca_path, + cert_path, + key_path); + return true; + } + + // whether the cert file content is updated + if (!fileUpdated()) + return false; + + // update cert files + FilesChangesTracker new_files; + for (const auto & file : cert_files.files) + { + new_files.addIfExists(file.path); + } + cert_files = std::move(new_files); + ssl_cerd_options_cached = false; + return true; } private: @@ -322,7 +358,7 @@ class TiFlashSecurityConfig : public ConfigObject String key_path; FilesChangesTracker cert_files; - bool redact_info_log = false; + RedactMode redact_info_log = RedactMode::Disable; std::set allowed_common_names; bool has_tls_config = false; bool has_security = false; diff --git a/dbms/src/Common/tests/gtest_redact.cpp b/dbms/src/Common/tests/gtest_redact.cpp index c10ab09c02e..07ccc5c6d12 100644 --- a/dbms/src/Common/tests/gtest_redact.cpp +++ b/dbms/src/Common/tests/gtest_redact.cpp @@ -12,13 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include +#include -namespace DB -{ -namespace tests +#include + +namespace DB::tests { + TEST(RedactLogTest, Basic) { const char * test_key = "\x01\x0a\xff"; @@ -26,18 +29,80 @@ TEST(RedactLogTest, Basic) const /*DB::HandleID*/ Int64 test_handle = 10009; - Redact::setRedactLog(false); + Redact::setRedactLog(RedactMode::Disable); EXPECT_EQ(Redact::keyToDebugString(test_key, key_sz), "010AFF"); EXPECT_EQ(Redact::keyToHexString(test_key, key_sz), "010AFF"); EXPECT_EQ(Redact::handleToDebugString(test_handle), "10009"); + std::stringstream ss; + Redact::keyToDebugString(test_key, key_sz, ss); + EXPECT_EQ(ss.str(), "010AFF"); - Redact::setRedactLog(true); + Redact::setRedactLog(RedactMode::Marker); + EXPECT_EQ(Redact::keyToDebugString(test_key, key_sz), "‹010AFF›"); + EXPECT_EQ(Redact::keyToHexString(test_key, key_sz), "010AFF"); + EXPECT_EQ(Redact::handleToDebugString(test_handle), "‹10009›"); + ss.str(""); + Redact::keyToDebugString(test_key, key_sz, ss); + EXPECT_EQ(ss.str(), "‹010AFF›"); + + Redact::setRedactLog(RedactMode::Enable); EXPECT_EQ(Redact::keyToDebugString(test_key, key_sz), "?"); EXPECT_EQ(Redact::keyToHexString(test_key, key_sz), "010AFF"); // Unaffected by readact-log status EXPECT_EQ(Redact::handleToDebugString(test_handle), "?"); + ss.str(""); + Redact::keyToDebugString(test_key, key_sz, ss); + EXPECT_EQ(ss.str(), "?"); + + Redact::setRedactLog(RedactMode::Disable); // restore flags +} + +TEST(RedactLogTest, ToMarkerString) +{ + for (const auto & [input, expect] : // + std::vector>{ + {"", "‹›"}, + {"abcdefg", "‹abcdefg›"}, + {"中文", "‹中文›"}, + }) + { + EXPECT_EQ(Redact::toMarkerString(input, true), expect) << input; + } + + for (const auto & [input, expect] : // + std::vector>{ + {"plain text", "‹plain text›"}, + {"‹›", "‹‹‹›››"}, + {"abc‹›de‹github›fg", "‹abc‹‹››de‹‹github››fg›"}, + {"abc‹", "‹abc‹‹›"}, + {"abc›def", "‹abc››def›"}, + {"abc‹github", "‹abc‹‹github›"}, + {"测试‹中文", "‹测试‹‹中文›"}, + }) + { + EXPECT_EQ(Redact::toMarkerString(input, false), expect) << input; + } +} + +TEST(RedactLogTest, FieldVisitorToDebugStringTest) +{ + std::vector> fields{ + {Field(), "‹NULL›"}, + {Field(static_cast(-65536)), "‹-65536›"}, + {Field(static_cast(65536)), "‹65536›"}, + {Field(3.1415926), "‹3.1415926›"}, + {Field(String("plain text")), "‹'plain text'›"}, + {Field(String("中文‹测试")), "‹'中文‹‹测试'›"}, + {Field(String("abc‹›de‹github›fg")), "‹'abc‹‹››de‹‹github››fg'›"}, + }; + + Redact::setRedactLog(RedactMode::Marker); + for (const auto & [input, expect] : fields) + { + auto output = applyVisitor(FieldVisitorToDebugString(), input); + EXPECT_EQ(output, expect); + } - Redact::setRedactLog(false); // restore flags + Redact::setRedactLog(RedactMode::Disable); } -} // namespace tests -} // namespace DB +} // namespace DB::tests diff --git a/dbms/src/Common/tests/gtest_tiflash_security.cpp b/dbms/src/Common/tests/gtest_tiflash_security.cpp index 7319744ffe1..66489c137ae 100644 --- a/dbms/src/Common/tests/gtest_tiflash_security.cpp +++ b/dbms/src/Common/tests/gtest_tiflash_security.cpp @@ -20,41 +20,38 @@ #include -namespace DB +namespace DB::tests { -namespace tests -{ -class TiFlashSecurityTest : public ext::Singleton -{ -}; TEST(TiFlashSecurityTest, Config) { - TiFlashSecurityConfig tiflash_config; - const auto log = Logger::get(); - tiflash_config.setLog(log); - - tiflash_config.parseAllowedCN(String("[abc,efg]")); - ASSERT_EQ((int)tiflash_config.allowedCommonNames().count("abc"), 1); - ASSERT_EQ((int)tiflash_config.allowedCommonNames().count("efg"), 1); - - tiflash_config.allowedCommonNames().clear(); - - tiflash_config.parseAllowedCN(String(R"(["abc","efg"])")); - ASSERT_EQ((int)tiflash_config.allowedCommonNames().count("abc"), 1); - ASSERT_EQ((int)tiflash_config.allowedCommonNames().count("efg"), 1); + { + auto cns = TiFlashSecurityConfig::parseAllowedCN(String("[abc,efg]")); + ASSERT_EQ(cns.count("abc"), 1); + ASSERT_EQ(cns.count("efg"), 1); + } - tiflash_config.allowedCommonNames().clear(); + { + auto cns = TiFlashSecurityConfig::parseAllowedCN(String(R"(["abc","efg"])")); + ASSERT_EQ(cns.count("abc"), 1); + ASSERT_EQ(cns.count("efg"), 1); + } - tiflash_config.parseAllowedCN(String("[ abc , efg ]")); - ASSERT_EQ((int)tiflash_config.allowedCommonNames().count("abc"), 1); - ASSERT_EQ((int)tiflash_config.allowedCommonNames().count("efg"), 1); + { + auto cns = TiFlashSecurityConfig::parseAllowedCN(String("[ abc , efg ]")); + ASSERT_EQ(cns.count("abc"), 1); + ASSERT_EQ(cns.count("efg"), 1); + } - tiflash_config.allowedCommonNames().clear(); + { + auto cns = TiFlashSecurityConfig::parseAllowedCN(String(R"([ "abc", "efg" ])")); + ASSERT_EQ(cns.count("abc"), 1); + ASSERT_EQ(cns.count("efg"), 1); + } - tiflash_config.parseAllowedCN(String(R"([ "abc", "efg" ])")); - ASSERT_EQ((int)tiflash_config.allowedCommonNames().count("abc"), 1); - ASSERT_EQ((int)tiflash_config.allowedCommonNames().count("efg"), 1); + TiFlashSecurityConfig tiflash_config; + const auto log = Logger::get(); + tiflash_config.setLog(log); String test = R"( @@ -79,6 +76,31 @@ cert_allowed_cn="tidb" ASSERT_EQ((int)new_tiflash_config.allowedCommonNames().count("tidb"), 0); } +TEST(TiFlashSecurityTest, RedactLogConfig) +{ + for (const auto & [input, expect] : std::vector>{ + {"marker", RedactMode::Marker}, + {"Marker", RedactMode::Marker}, + {"MARKER", RedactMode::Marker}, + {"true", RedactMode::Enable}, + {"True", RedactMode::Enable}, + {"TRUE", RedactMode::Enable}, + {"yes", RedactMode::Enable}, + {"on", RedactMode::Enable}, + {"1", RedactMode::Enable}, + {"2", RedactMode::Enable}, + {"false", RedactMode::Disable}, + {"False", RedactMode::Disable}, + {"FALSE", RedactMode::Disable}, + {"no", RedactMode::Disable}, + {"off", RedactMode::Disable}, + {"0", RedactMode::Disable}, + }) + { + EXPECT_EQ(TiFlashSecurityConfig::parseRedactLog(input), expect); + } +} + TEST(TiFlashSecurityTest, Update) { String test = @@ -271,5 +293,5 @@ TEST(TiFlashSecurityTest, readAndCacheSslCredentialOptions) Poco::File key_file(key_path); key_file.remove(false); } -} // namespace tests -} // namespace DB + +} // namespace DB::tests diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 539a9e8409e..939eb8fec6b 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -525,8 +525,6 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate current_bucket_num = min_bucket_num; - // std::cerr << "current_bucket_num = " << current_bucket_num << "\n"; - /// No more blocks with ordinary data. if (current_bucket_num == NUM_BUCKETS) continue; @@ -538,15 +536,11 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate { if (input.block.info.bucket_num == current_bucket_num) { - // std::cerr << "having block for current_bucket_num\n"; - blocks_to_merge->emplace_back(std::move(input.block)); input.block = Block(); } else if (!input.splitted_blocks.empty() && input.splitted_blocks[min_bucket_num]) { - // std::cerr << "having splitted data for bucket\n"; - blocks_to_merge->emplace_back(std::move(input.splitted_blocks[min_bucket_num])); input.splitted_blocks[min_bucket_num] = Block(); } @@ -557,7 +551,6 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate else { /// There are only non-partitioned (single-level) data. Just merge them. - // std::cerr << "don't have two level\n"; BlocksToMerge blocks_to_merge = std::make_unique(); diff --git a/dbms/src/Debug/MockStorage.cpp b/dbms/src/Debug/MockStorage.cpp index 70313f561f2..a00aedefcf4 100644 --- a/dbms/src/Debug/MockStorage.cpp +++ b/dbms/src/Debug/MockStorage.cpp @@ -208,7 +208,7 @@ BlockInputStreamPtr MockStorage::getStreamFromDeltaMerge( rf_max_wait_time_ms, context.getTimezoneInfo()); auto [before_where, filter_column_name, project_after_where] - = ::DB::buildPushDownFilter(filter_conditions->conditions, *analyzer); + = analyzer->buildPushDownFilter(filter_conditions->conditions); BlockInputStreams ins = storage->read( column_names, query_info, @@ -265,7 +265,7 @@ void MockStorage::buildExecFromDeltaMerge( rf_max_wait_time_ms, context.getTimezoneInfo()); // Not using `auto [before_where, filter_column_name, project_after_where]` just to make the compiler happy. - auto build_ret = ::DB::buildPushDownFilter(filter_conditions->conditions, *analyzer); + auto build_ret = analyzer->buildPushDownFilter(filter_conditions->conditions); storage->read( exec_context_, group_builder, diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 0cc4d3be567..17e72774a17 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -967,6 +967,30 @@ String DAGExpressionAnalyzer::buildFilterColumn( return filter_column_name; } +std::tuple DAGExpressionAnalyzer::buildPushDownFilter( + const google::protobuf::RepeatedPtrField & conditions) +{ + assert(!conditions.empty()); + + ExpressionActionsChain chain; + initChain(chain); + String filter_column_name = appendWhere(chain, conditions); + ExpressionActionsPtr before_where = chain.getLastActions(); + chain.addStep(); + + // remove useless tmp column and keep the schema of local streams and remote streams the same. + for (const auto & col : getCurrentInputColumns()) + { + chain.getLastStep().required_output.push_back(col.name); + } + ExpressionActionsPtr project_after_where = chain.getLastActions(); + chain.finalize(); + chain.clear(); + + RUNTIME_CHECK(!project_after_where->getActions().empty()); + return {before_where, filter_column_name, project_after_where}; +} + String DAGExpressionAnalyzer::appendWhere( ExpressionActionsChain & chain, const google::protobuf::RepeatedPtrField & conditions) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index 32accc9e2a5..cef56088324 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -146,6 +146,9 @@ class DAGExpressionAnalyzer : private boost::noncopyable const ExpressionActionsPtr & actions, const google::protobuf::RepeatedPtrField & conditions); + std::tuple buildPushDownFilter( + const google::protobuf::RepeatedPtrField & conditions); + void buildAggFuncs( const tipb::Aggregation & aggregation, const ExpressionActionsPtr & actions, diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp index 04a50caeb31..fd568759930 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp @@ -436,31 +436,6 @@ void executeCreatingSets(DAGPipeline & pipeline, const Context & context, size_t } } -std::tuple buildPushDownFilter( - const google::protobuf::RepeatedPtrField & conditions, - DAGExpressionAnalyzer & analyzer) -{ - assert(!conditions.empty()); - - ExpressionActionsChain chain; - analyzer.initChain(chain); - String filter_column_name = analyzer.appendWhere(chain, conditions); - ExpressionActionsPtr before_where = chain.getLastActions(); - chain.addStep(); - - // remove useless tmp column and keep the schema of local streams and remote streams the same. - for (const auto & col : analyzer.getCurrentInputColumns()) - { - chain.getLastStep().required_output.push_back(col.name); - } - ExpressionActionsPtr project_after_where = chain.getLastActions(); - chain.finalize(); - chain.clear(); - - RUNTIME_CHECK(!project_after_where->getActions().empty()); - return {before_where, filter_column_name, project_after_where}; -} - void executePushedDownFilter( const FilterConditions & filter_conditions, DAGExpressionAnalyzer & analyzer, @@ -468,7 +443,7 @@ void executePushedDownFilter( DAGPipeline & pipeline) { auto [before_where, filter_column_name, project_after_where] - = ::DB::buildPushDownFilter(filter_conditions.conditions, analyzer); + = analyzer.buildPushDownFilter(filter_conditions.conditions); for (auto & stream : pipeline.streams) { @@ -489,7 +464,7 @@ void executePushedDownFilter( LoggerPtr log) { auto [before_where, filter_column_name, project_after_where] - = ::DB::buildPushDownFilter(filter_conditions.conditions, analyzer); + = analyzer.buildPushDownFilter(filter_conditions.conditions); auto input_header = group_builder.getCurrentHeader(); for (size_t i = 0; i < group_builder.concurrency(); ++i) diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.h b/dbms/src/Flash/Coprocessor/InterpreterUtils.h index 58f0c0c7050..785d86bf8eb 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.h +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.h @@ -96,10 +96,6 @@ void executeFinalSort( void executeCreatingSets(DAGPipeline & pipeline, const Context & context, size_t max_streams, const LoggerPtr & log); -std::tuple buildPushDownFilter( - const google::protobuf::RepeatedPtrField & conditions, - DAGExpressionAnalyzer & analyzer); - void executePushedDownFilter( const FilterConditions & filter_conditions, DAGExpressionAnalyzer & analyzer, diff --git a/dbms/src/Flash/Coprocessor/tests/gtest_ti_remote_block_inputstream.cpp b/dbms/src/Flash/Coprocessor/tests/gtest_ti_remote_block_inputstream.cpp index 9448bd8477b..0aa6d59dd93 100644 --- a/dbms/src/Flash/Coprocessor/tests/gtest_ti_remote_block_inputstream.cpp +++ b/dbms/src/Flash/Coprocessor/tests/gtest_ti_remote_block_inputstream.cpp @@ -78,7 +78,7 @@ struct MockWriter summary.scan_context->dmfile_mvcc_skipped_rows = 15000; summary.scan_context->dmfile_lm_filter_scanned_rows = 8000; summary.scan_context->dmfile_lm_filter_skipped_rows = 15000; - summary.scan_context->total_dmfile_rough_set_index_check_time_ns = 10; + summary.scan_context->total_rs_pack_filter_check_time_ns = 10; summary.scan_context->total_dmfile_read_time_ns = 200; summary.scan_context->create_snapshot_time_ns = 5; summary.scan_context->total_local_region_num = 10; diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index a7cc99d1699..f5b35f802cd 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -5212,6 +5212,7 @@ class FunctionSubStringIndex : public IFunction const UInt8 * pos = begin; const UInt8 * end = pos + data_size; assert(delim_size != 0); + assert(count != 0); if (count > 0) { // Fast exit when count * delim_size > data_size @@ -5227,10 +5228,11 @@ class FunctionSubStringIndex : public IFunction if (match == end || count == 0) { copyDataToResult(res_data, res_offset, begin, match); - break; + return; } pos = match + delim_size; } + copyDataToResult(res_data, res_offset, begin, end); } else { diff --git a/dbms/src/Functions/tests/gtest_substring_index.cpp b/dbms/src/Functions/tests/gtest_substring_index.cpp index 0bd8383e2d1..39dec7a38c2 100644 --- a/dbms/src/Functions/tests/gtest_substring_index.cpp +++ b/dbms/src/Functions/tests/gtest_substring_index.cpp @@ -364,6 +364,15 @@ try createColumn>({"www.pingcap.com", "www...www", "中文.测.试。。。", "www.www"}), createColumn>({"", "", "", ""}), createColumn>({2, 2, 2, 2}))); + + // Test issue 9116 + ASSERT_COLUMN_EQ( + createColumn>({"aaabbba", "aaabbbaa", "aaabbbaaa", "aaabbbaaa", "aaabbbaaa"}), + executeFunction( + func_name, + createColumn>({"aaabbbaaa", "aaabbbaaa", "aaabbbaaa", "aaabbbaaa", "aaabbbaaa"}), + createColumn>({"a", "a", "a", "a", "a"}), + createColumn>({5, 6, 7, 8, 9}))); } CATCH diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index 90d20a1f381..b0fd613b170 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -300,6 +301,7 @@ void AsynchronousMetrics::update() set("LogNums", usage.total_log_file_num); set("LogDiskBytes", usage.total_log_disk_size); set("PagesInMem", usage.num_pages); + set("VersionedEntries", DB::PS::PageStorageMemorySummary::versioned_entry_or_delete_count.load()); } if (context.getSharedContextDisagg()->isDisaggregatedStorageMode()) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 5831f99646c..4865b7ee007 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -14,16 +14,25 @@ #include #include +#include #include #include +#include #include #include #include #include #include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#include +#pragma GCC diagnostic pop + #include #include +#include namespace DB { @@ -419,63 +428,64 @@ void ExpressionAction::execute(Block & block) const String ExpressionAction::toString() const { - std::stringstream ss; + FmtBuffer fmt_buf; switch (type) { case ADD_COLUMN: - ss << "ADD " << result_name << " " << (result_type ? result_type->getName() : "(no type)") << " " - << (added_column ? added_column->getName() : "(no column)"); + fmt_buf.fmtAppend( + "ADD {} {} {}", + result_name, + (result_type ? result_type->getName() : "(no type)"), + (added_column ? added_column->getName() : "(no column)")); break; case REMOVE_COLUMN: - ss << "REMOVE " << source_name; + fmt_buf.fmtAppend("REMOVE {}", source_name); break; case COPY_COLUMN: - ss << "COPY " << result_name << " = " << source_name; + fmt_buf.fmtAppend("COPY {} = {}", result_name, source_name); break; case APPLY_FUNCTION: - ss << "FUNCTION " << result_name << " " << (result_type ? result_type->getName() : "(no type)") << " = " - << (function ? function->getName() : "(no function)") << "("; - for (size_t i = 0; i < argument_names.size(); ++i) - { - if (i) - ss << ", "; - ss << argument_names[i]; - } - ss << ")"; + fmt_buf.fmtAppend( + "FUNCTION {} {} = {} (", + result_name, + (result_type ? result_type->getName() : "(no type)"), + (function ? function->getName() : "(no function)")); + fmt_buf.joinStr(argument_names.begin(), argument_names.end(), ", "); + fmt_buf.append(")"); break; case JOIN: - ss << "JOIN "; - for (auto it = columns_added_by_join.begin(); it != columns_added_by_join.end(); ++it) - { - if (it != columns_added_by_join.begin()) - ss << ", "; - ss << it->name; - } + fmt_buf.append("JOIN "); + fmt_buf.joinStr( + columns_added_by_join.begin(), + columns_added_by_join.end(), + [](const NamesAndTypesList::value_type & col, FmtBuffer & buf) { buf.append(col.name); }, + ", "); break; case PROJECT: - ss << "PROJECT "; - for (size_t i = 0; i < projections.size(); ++i) - { - if (i) - ss << ", "; - ss << projections[i].first; - if (!projections[i].second.empty() && projections[i].second != projections[i].first) - ss << " AS " << projections[i].second; - } + fmt_buf.append("PROJECT "); + fmt_buf.joinStr( + projections.begin(), + projections.end(), + [](const NamesWithAliases::value_type & proj, FmtBuffer & buf) { + buf.append(proj.first); + if (!proj.second.empty() && proj.second != proj.first) + buf.fmtAppend(" AS {}", proj.second); + }, + ", "); break; + case CONVERT_TO_NULLABLE: - ss << "CONVERT_TO_NULLABLE("; - ss << col_need_to_nullable << ")"; + fmt_buf.fmtAppend("CONVERT_TO_NULLABLE({})", col_need_to_nullable); break; default: - throw Exception("Unexpected Action type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected Action type, type={}", static_cast(type)); } - return ss.str(); + return fmt_buf.toString(); } void ExpressionActions::addInput(const ColumnWithTypeAndName & column) @@ -665,11 +675,6 @@ void ExpressionActions::finalize(const Names & output_columns, bool keep_used_in } } - /* std::cerr << "\n"; - for (const auto & action : actions) - std::cerr << action.toString() << "\n"; - std::cerr << "\n";*/ - /// Deletes unnecessary temporary columns. /// If the column after performing the function `refcount = 0`, it can be deleted. @@ -750,6 +755,30 @@ std::string ExpressionActions::dumpActions() const return ss.str(); } +std::string ExpressionActions::dumpJSONActions() const +{ + Poco::JSON::Object::Ptr json = new Poco::JSON::Object(); + + Poco::JSON::Array::Ptr inputs = new Poco::JSON::Array(); + for (const auto & input_col : input_columns) + inputs->add(fmt::format("{} {}", input_col.name, input_col.type->getName())); + json->set("input", inputs); + + Poco::JSON::Array::Ptr acts = new Poco::JSON::Array(); + for (const auto & action : actions) + acts->add(action.toString()); + json->set("actions", acts); + + Poco::JSON::Array::Ptr outputs = new Poco::JSON::Array(); + for (const auto & output_col : sample_block.getNamesAndTypesList()) + outputs->add(fmt::format("{} {}", output_col.name, output_col.type->getName())); + json->set("output", outputs); + + std::stringstream buf; + json->stringify(buf); + return buf.str(); +} + void ExpressionActionsChain::addStep() { if (steps.empty()) @@ -788,18 +817,14 @@ void ExpressionActionsChain::finalize() std::string ExpressionActionsChain::dumpChain() { - std::stringstream ss; - + FmtBuffer fmt_buf; for (size_t i = 0; i < steps.size(); ++i) { - ss << "step " << i << "\n"; - ss << "required output:\n"; - for (const std::string & name : steps[i].required_output) - ss << name << "\n"; - ss << "\n" << steps[i].actions->dumpActions() << "\n"; + fmt_buf.fmtAppend("step {}\nrequired output:\n", i); + fmt_buf.joinStr(steps[i].required_output.begin(), steps[i].required_output.end(), "\n"); + fmt_buf.fmtAppend("\n{}\n", steps[i].actions->dumpActions()); } - - return ss.str(); + return fmt_buf.toString(); } template std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns); diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 552aec13041..92bbfe02d71 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -207,6 +207,7 @@ class ExpressionActions const Block & getSampleBlock() const { return sample_block; } std::string dumpActions() const; + std::string dumpJSONActions() const; template static std::string getSmallestColumn(const NameAndTypeContainer & columns); diff --git a/dbms/src/Parsers/CMakeLists.txt b/dbms/src/Parsers/CMakeLists.txt index 8511e6811fe..a7243ab8753 100644 --- a/dbms/src/Parsers/CMakeLists.txt +++ b/dbms/src/Parsers/CMakeLists.txt @@ -17,7 +17,3 @@ add_headers_and_sources(tiflash_parsers .) add_library(tiflash_parsers ${SPLIT_SHARED} ${tiflash_parsers_headers} ${tiflash_parsers_sources}) target_link_libraries (tiflash_parsers tiflash_common_io) target_include_directories (tiflash_parsers PUBLIC ${DBMS_INCLUDE_DIR}) - -if (ENABLE_TESTS) - add_subdirectory (tests EXCLUDE_FROM_ALL) -endif () diff --git a/dbms/src/Parsers/tests/CMakeLists.txt b/dbms/src/Parsers/tests/CMakeLists.txt deleted file mode 100644 index a8654d449da..00000000000 --- a/dbms/src/Parsers/tests/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2023 PingCAP, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -include_directories (${CMAKE_CURRENT_BINARY_DIR}) - -set(SRCS ) - -add_executable (lexer lexer.cpp ${SRCS}) -target_link_libraries (lexer tiflash_parsers) - -add_executable (select_parser select_parser.cpp ${SRCS}) -target_link_libraries (select_parser tiflash_parsers) - -add_executable (create_parser create_parser.cpp ${SRCS}) -target_link_libraries (create_parser tiflash_parsers) - diff --git a/dbms/src/Parsers/tests/lexer.cpp b/dbms/src/Parsers/tests/lexer.cpp deleted file mode 100644 index 411de72a3f4..00000000000 --- a/dbms/src/Parsers/tests/lexer.cpp +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2023 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include - -#include - - -/// How to test: -/// for i in ~/work/ClickHouse/dbms/tests/queries/0_stateless/*.sql; do echo $i; grep -q 'FORMAT' $i || ./lexer < $i || break; done -/// - - -using namespace DB; - -std::map hilite = { - {TokenType::Whitespace, "\033[0;44m"}, - {TokenType::Comment, "\033[1;46m"}, - {TokenType::BareWord, "\033[1m"}, - {TokenType::Number, "\033[1;36m"}, - {TokenType::StringLiteral, "\033[1;32m"}, - {TokenType::QuotedIdentifier, "\033[1;35m"}, - - {TokenType::OpeningRoundBracket, "\033[1;33m"}, - {TokenType::ClosingRoundBracket, "\033[1;33m"}, - {TokenType::OpeningSquareBracket, "\033[1;33m"}, - {TokenType::ClosingSquareBracket, "\033[1;33m"}, - - {TokenType::Comma, "\033[1;33m"}, - {TokenType::Semicolon, "\033[1;33m"}, - {TokenType::Dot, "\033[1;33m"}, - {TokenType::Asterisk, "\033[1;33m"}, - {TokenType::Plus, "\033[1;33m"}, - {TokenType::Minus, "\033[1;33m"}, - {TokenType::Slash, "\033[1;33m"}, - {TokenType::Percent, "\033[1;33m"}, - {TokenType::Arrow, "\033[1;33m"}, - {TokenType::QuestionMark, "\033[1;33m"}, - {TokenType::Colon, "\033[1;33m"}, - {TokenType::Equals, "\033[1;33m"}, - {TokenType::NotEquals, "\033[1;33m"}, - {TokenType::Less, "\033[1;33m"}, - {TokenType::Greater, "\033[1;33m"}, - {TokenType::LessOrEquals, "\033[1;33m"}, - {TokenType::GreaterOrEquals, "\033[1;33m"}, - {TokenType::Concatenation, "\033[1;33m"}, - - {TokenType::EndOfStream, ""}, - - {TokenType::Error, "\033[0;41m"}, - {TokenType::ErrorMultilineCommentIsNotClosed, "\033[0;41m"}, - {TokenType::ErrorSingleQuoteIsNotClosed, "\033[0;41m"}, - {TokenType::ErrorDoubleQuoteIsNotClosed, "\033[0;41m"}, - {TokenType::ErrorBackQuoteIsNotClosed, "\033[0;41m"}, - {TokenType::ErrorSingleExclamationMark, "\033[0;41m"}, - {TokenType::ErrorWrongNumber, "\033[0;41m"}, - {TokenType::ErrorMaxQuerySizeExceeded, "\033[0;41m"}, -}; - - -int main(int, char **) -{ - String query; - ReadBufferFromFileDescriptor in(STDIN_FILENO); - WriteBufferFromFileDescriptor out(STDOUT_FILENO); - readStringUntilEOF(query, in); - - Lexer lexer(query.data(), query.data() + query.size()); - - while (true) - { - Token token = lexer.nextToken(); - - if (token.isEnd()) - break; - - writeChar(' ', out); - - auto it = hilite.find(token.type); - if (it != hilite.end()) - writeCString(it->second, out); - - writeString(token.begin, token.size(), out); - - if (it != hilite.end()) - writeCString("\033[0m", out); - - writeChar(' ', out); - - if (token.isError()) - return 1; - } - - writeChar('\n', out); - /* - Tokens tokens(query.data(), query.data() + query.size()); - TokenIterator token(tokens); - - while (token->type.isEnd()) - { - auto it = hilite.find(token->type); - if (it != hilite.end()) - writeCString(it->second, out); - - writeString(token->begin, token->size(), out); - - if (it != hilite.end()) - writeCString("\033[0m", out); - - writeChar('\n', out); - ++token; - }*/ - - return 0; -} diff --git a/dbms/src/Parsers/tests/select_parser.cpp b/dbms/src/Parsers/tests/select_parser.cpp deleted file mode 100644 index 0fd6b6650b9..00000000000 --- a/dbms/src/Parsers/tests/select_parser.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2023 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include - -#include - - -int main(int, char **) -try -{ - using namespace DB; - - std::string input = " SELECT 18446744073709551615, f(1), '\\\\', [a, b, c], (a, b, c), 1 + 2 * -3, a = b OR c > " - "d.1 + 2 * -g[0] AND NOT e < f * (x + y)" - " FROM default.hits" - " WHERE CounterID = 101500 AND UniqID % 3 = 0" - " GROUP BY UniqID" - " HAVING SUM(Refresh) > 100" - " ORDER BY Visits, PageViews" - " LIMIT 1000, 10" - " INTO OUTFILE 'test.out'" - " FORMAT TabSeparated"; - - ParserQueryWithOutput parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); - - std::cout << "Success." << std::endl; - formatAST(*ast, std::cerr); - std::cout << std::endl; - - return 0; -} -catch (...) -{ - std::cerr << DB::getCurrentExceptionMessage(true) << "\n"; - return 1; -} diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp index 3b230174eb2..cfee9ae5089 100644 --- a/dbms/src/Server/TCPHandler.cpp +++ b/dbms/src/Server/TCPHandler.cpp @@ -567,8 +567,6 @@ bool TCPHandler::receivePacket() UInt64 packet_type = 0; readVarUInt(packet_type, *in); - // std::cerr << "Packet: " << packet_type << std::endl; - switch (packet_type) { case Protocol::Client::Query: diff --git a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp index 381df2dbd8e..6ee1ebdbae1 100644 --- a/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp +++ b/dbms/src/Storages/DeltaMerge/File/ColumnStream.cpp @@ -157,10 +157,10 @@ std::unique_ptr ColumnReadStream::buildColDataRe // Try to get the largest buffer size of reading continuous packs size_t buffer_size = 0; - const auto & use_packs = reader.pack_filter.getUsePacksConst(); + const auto & pack_res = reader.pack_filter.getPackResConst(); for (size_t i = 0; i < n_packs; /*empty*/) { - if (!use_packs[i]) + if (!isUse(pack_res[i])) { ++i; continue; @@ -168,7 +168,7 @@ std::unique_ptr ColumnReadStream::buildColDataRe size_t cur_offset_in_file = getOffsetInFile(i); size_t end = i + 1; // First, find the end of current available range. - while (end < n_packs && use_packs[end]) + while (end < n_packs && isUse(pack_res[end])) ++end; // Second, if the end of range is inside the block, we will need to read it too. diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp index 849b3ccff25..f8ac8eace57 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.cpp @@ -21,6 +21,8 @@ namespace DB::DM void DMFilePackFilter::init() { + Stopwatch watch; + SCOPE_EXIT({ scan_context->total_rs_pack_filter_check_time_ns += watch.elapsed(); }); size_t pack_count = dmfile->getPacks(); auto read_all_packs = (rowkey_ranges.size() == 1 && rowkey_ranges[0].all()) || rowkey_ranges.empty(); if (!read_all_packs) @@ -47,33 +49,21 @@ void DMFilePackFilter::init() ProfileEvents::increment(ProfileEvents::DMFileFilterNoFilter, pack_count); - size_t after_pk = 0; - size_t after_read_packs = 0; - size_t after_filter = 0; - /// Check packs by handle_res - for (size_t i = 0; i < pack_count; ++i) - { - use_packs[i] = handle_res[i] != None; - } - - for (auto u : use_packs) - after_pk += u; + pack_res = handle_res; + auto after_pk = countUsePack(); /// Check packs by read_packs if (read_packs) { for (size_t i = 0; i < pack_count; ++i) { - use_packs[i] = (static_cast(use_packs[i])) && read_packs->contains(i); + pack_res[i] = read_packs->contains(i) ? pack_res[i] : RSResult::None; } } - - for (auto u : use_packs) - after_read_packs += u; + auto after_read_packs = countUsePack(); ProfileEvents::increment(ProfileEvents::DMFileFilterAftPKAndPackSet, after_read_packs); - /// Check packs by filter in where clause if (filter) { @@ -84,20 +74,20 @@ void DMFilePackFilter::init() tryLoadIndex(id); } - Stopwatch watch; const auto check_results = filter->roughCheck(0, pack_count, param); std::transform( - use_packs.begin(), - use_packs.end(), - check_results.begin(), - use_packs.begin(), - [](UInt8 a, RSResult b) { return (static_cast(a)) && (b != None); }); - scan_context->total_dmfile_rough_set_index_check_time_ns += watch.elapsed(); + pack_res.cbegin(), + pack_res.cend(), + check_results.cbegin(), + pack_res.begin(), + [](RSResult a, RSResult b) { return a && b; }); } - - for (auto u : use_packs) - after_filter += u; + auto [none_count, some_count, all_count] = countPackRes(); + auto after_filter = some_count + all_count; ProfileEvents::increment(ProfileEvents::DMFileFilterAftRoughSet, after_filter); + scan_context->rs_pack_filter_none += none_count; + scan_context->rs_pack_filter_some += some_count; + scan_context->rs_pack_filter_all += all_count; Float64 filter_rate = 0.0; if (after_read_packs != 0) @@ -108,14 +98,47 @@ void DMFilePackFilter::init() LOG_DEBUG( log, "RSFilter exclude rate: {:.2f}, after_pk: {}, after_read_packs: {}, after_filter: {}, handle_ranges: {}" - ", read_packs: {}, pack_count: {}", + ", read_packs: {}, pack_count: {}, none_count: {}, some_count: {}, all_count: {}", ((after_read_packs == 0) ? std::numeric_limits::quiet_NaN() : filter_rate), after_pk, after_read_packs, after_filter, toDebugString(rowkey_ranges), ((read_packs == nullptr) ? 0 : read_packs->size()), - pack_count); + pack_count, + none_count, + some_count, + all_count); +} + +std::tuple DMFilePackFilter::countPackRes() const +{ + UInt64 none_count = 0; + UInt64 some_count = 0; + UInt64 all_count = 0; + for (auto res : pack_res) + { + switch (res) + { + case RSResult::None: + ++none_count; + break; + case RSResult::Some: + ++some_count; + break; + case RSResult::All: + ++all_count; + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "{} is invalid", static_cast(res)); + } + } + return {none_count, some_count, all_count}; +} + +UInt64 DMFilePackFilter::countUsePack() const +{ + return std::count_if(pack_res.cbegin(), pack_res.cend(), [](RSResult res) { return isUse(res); }); } void DMFilePackFilter::loadIndex( @@ -224,7 +247,7 @@ void DMFilePackFilter::loadIndex( indexes.emplace(col_id, RSIndex(type, minmax_index)); } -void DMFilePackFilter::tryLoadIndex(const ColId col_id) +void DMFilePackFilter::tryLoadIndex(ColId col_id) { if (param.indexes.count(col_id)) return; @@ -234,8 +257,6 @@ void DMFilePackFilter::tryLoadIndex(const ColId col_id) Stopwatch watch; loadIndex(param.indexes, dmfile, file_provider, index_cache, set_cache_if_miss, col_id, read_limiter, scan_context); - - scan_context->total_dmfile_rough_set_index_check_time_ns += watch.elapsed(); } } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h index 2a227a0dc39..16c55b96f4b 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h +++ b/dbms/src/Storages/DeltaMerge/File/DMFilePackFilter.h @@ -68,9 +68,10 @@ class DMFilePackFilter return pack_filter; } - inline const std::vector & getHandleRes() const { return handle_res; } - inline const std::vector & getUsePacksConst() const { return use_packs; } - inline std::vector & getUsePacks() { return use_packs; } + const RSResults & getHandleRes() const { return handle_res; } + const RSResults & getPackResConst() const { return pack_res; } + RSResults & getPackRes() { return pack_res; } + UInt64 countUsePack() const; Handle getMinHandle(size_t pack_id) { @@ -104,7 +105,7 @@ class DMFilePackFilter const auto & pack_stats = dmfile->getPackStats(); for (size_t i = 0; i < pack_stats.size(); ++i) { - if (use_packs[i]) + if (isUse(pack_res[i])) { rows += pack_stats[i].rows; bytes += pack_stats[i].bytes; @@ -133,7 +134,6 @@ class DMFilePackFilter , read_packs(read_packs_) , file_provider(file_provider_) , handle_res(dmfile->getPacks(), RSResult::All) - , use_packs(dmfile->getPacks()) , scan_context(scan_context_) , log(Logger::get(tracing_id)) , read_limiter(read_limiter_) @@ -151,7 +151,10 @@ class DMFilePackFilter const ReadLimiterPtr & read_limiter, const ScanContextPtr & scan_context); - void tryLoadIndex(const ColId col_id); + void tryLoadIndex(ColId col_id); + + // None, Some, All + std::tuple countPackRes() const; private: DMFilePtr dmfile; @@ -164,8 +167,10 @@ class DMFilePackFilter RSCheckParam param; + // `handle_res` is the filter results of `rowkey_ranges`. std::vector handle_res; - std::vector use_packs; + // `pack_res` is the filter results of `rowkey_ranges && filter && read_packs`. + std::vector pack_res; const ScanContextPtr scan_context; diff --git a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp index 33452997292..027f790d227 100644 --- a/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp +++ b/dbms/src/Storages/DeltaMerge/File/DMFileReader.cpp @@ -108,16 +108,16 @@ DMFileReader::DMFileReader( bool DMFileReader::getSkippedRows(size_t & skip_rows) { skip_rows = 0; - const auto & use_packs = pack_filter.getUsePacksConst(); + const auto & pack_res = pack_filter.getPackResConst(); const auto & pack_stats = dmfile->getPackStats(); - for (; next_pack_id < use_packs.size() && !use_packs[next_pack_id]; ++next_pack_id) + for (; next_pack_id < pack_res.size() && !isUse(pack_res[next_pack_id]); ++next_pack_id) { skip_rows += pack_stats[next_pack_id].rows; addSkippedRows(pack_stats[next_pack_id].rows); } next_row_offset += skip_rows; // return false if it is the end of stream. - return next_pack_id < use_packs.size(); + return next_pack_id < pack_res.size(); } // Skip the block which should be returned by next read() @@ -144,14 +144,14 @@ size_t DMFileReader::skipNextBlock() // Move forward next_pack_id and next_row_offset size_t DMFileReader::getReadRows() { - const auto & use_packs = pack_filter.getUsePacksConst(); + const auto & pack_res = pack_filter.getPackResConst(); const size_t start_pack_id = next_pack_id; // When read_one_pack_every_time is true, we can just read one pack every time. // std::numeric_limits::max() means no limit const size_t read_pack_limit = read_one_pack_every_time ? 1 : std::numeric_limits::max(); const auto & pack_stats = dmfile->getPackStats(); size_t read_rows = 0; - for (; next_pack_id < use_packs.size() && use_packs[next_pack_id] && read_rows < rows_threshold_per_read; + for (; next_pack_id < pack_res.size() && isUse(pack_res[next_pack_id]) && read_rows < rows_threshold_per_read; ++next_pack_id) { if (next_pack_id - start_pack_id >= read_pack_limit) @@ -172,10 +172,10 @@ Block DMFileReader::readWithFilter(const IColumn::Filter & filter) return {}; } - /// 2. Mark use_packs[i] = false if all rows in the i-th pack are filtered out by filter. + /// 2. Mark pack_res[i] = None if all rows in the i-th pack are filtered out by filter. const auto & pack_stats = dmfile->getPackStats(); - auto & use_packs = pack_filter.getUsePacks(); + auto & pack_res = pack_filter.getPackRes(); size_t start_row_offset = next_row_offset; size_t start_pack_id = next_pack_id; @@ -187,18 +187,18 @@ Block DMFileReader::readWithFilter(const IColumn::Filter & filter) for (size_t i = start_pack_id; i < last_pack_id; ++i) { if (countBytesInFilter(filter, offset, pack_stats[i].rows) == 0) - use_packs[i] = false; + pack_res[i] = RSResult::None; offset += pack_stats[i].rows; } } - /// 3. Mark the use_packs[last_pack_id] as false temporarily to avoid reading it and its following packs in this round + /// 3. Mark the pack_res[last_pack_id] as None temporarily to avoid reading it and its following packs in this round - bool next_pack_id_use_packs_cp = false; - if (last_pack_id < use_packs.size()) + auto next_pack_id_pack_res_cp = RSResult::None; + if (last_pack_id < pack_res.size()) { - next_pack_id_use_packs_cp = use_packs[last_pack_id]; - use_packs[last_pack_id] = false; + next_pack_id_pack_res_cp = pack_res[last_pack_id]; + pack_res[last_pack_id] = RSResult::None; } /// 4. Read and filter packs @@ -221,11 +221,11 @@ Block DMFileReader::readWithFilter(const IColumn::Filter & filter) { // When the next pack is not used or the pack is the last pack, call read() to read theses packs and filter them // For example: - // When next_pack_id_cp = use_packs.size() and use_packs[next_pack_id:next_pack_id_cp] = [true, true, false, true, true, true] + // When next_pack_id_cp = pack_res.size() and pack_res[next_pack_id:next_pack_id_cp] = [true, true, false, true, true, true] // The algorithm runs as follows: // When i = next_pack_id + 2, call read() to read {next_pack_id, next_pack_id + 1}th packs // When i = next_pack_id + 5, call read() to read {next_pack_id + 3, next_pack_id + 4, next_pack_id + 5}th packs - if (use_packs[pack_id] && (pack_id + 1 == use_packs.size() || !use_packs[pack_id + 1])) + if (isUse(pack_res[pack_id]) && (pack_id + 1 == pack_res.size() || !isUse(pack_res[pack_id + 1]))) { Block block = read(); size_t rows = block.rows(); @@ -256,16 +256,16 @@ Block DMFileReader::readWithFilter(const IColumn::Filter & filter) } offset += rows; } - else if (!use_packs[pack_id]) + else if (!isUse(pack_res[pack_id])) { offset += pack_stats[pack_id].rows; } } - /// 5. Restore the use_packs[last_pack_id] + /// 5. Restore the pack_res[last_pack_id] - if (last_pack_id < use_packs.size()) - use_packs[last_pack_id] = next_pack_id_use_packs_cp; + if (last_pack_id < pack_res.size()) + pack_res[last_pack_id] = next_pack_id_pack_res_cp; Block res = getHeader().cloneWithColumns(std::move(columns)); res.setStartOffset(start_row_offset); diff --git a/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.cpp b/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.cpp new file mode 100644 index 00000000000..4f0c6ceb787 --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.cpp @@ -0,0 +1,197 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB::DM +{ +PushDownFilterPtr PushDownFilter::build( + const RSOperatorPtr & rs_operator, + const ColumnInfos & table_scan_column_info, + const google::protobuf::RepeatedPtrField & pushed_down_filters, + const ColumnDefines & columns_to_read, + const Context & context, + const LoggerPtr & tracing_logger) +{ + if (pushed_down_filters.empty()) + { + LOG_DEBUG(tracing_logger, "Push down filter is empty"); + return std::make_shared(rs_operator); + } + std::unordered_map columns_to_read_map; + for (const auto & column : columns_to_read) + columns_to_read_map.emplace(column.id, column); + + // Get the columns of the filter, is a subset of columns_to_read + std::unordered_set filter_col_id_set; + for (const auto & expr : pushed_down_filters) + { + getColumnIDsFromExpr(expr, table_scan_column_info, filter_col_id_set); + } + auto filter_columns = std::make_shared(); + filter_columns->reserve(filter_col_id_set.size()); + for (const auto & cid : filter_col_id_set) + { + RUNTIME_CHECK_MSG( + columns_to_read_map.contains(cid), + "Filter ColumnID({}) not found in columns_to_read_map", + cid); + filter_columns->emplace_back(columns_to_read_map.at(cid)); + } + + // The source_columns_of_analyzer should be the same as the size of table_scan_column_info + // The columns_to_read is a subset of table_scan_column_info, when there are generated columns and extra table id column. + NamesAndTypes source_columns_of_analyzer; + source_columns_of_analyzer.reserve(table_scan_column_info.size()); + for (size_t i = 0; i < table_scan_column_info.size(); ++i) + { + auto const & ci = table_scan_column_info[i]; + const auto cid = ci.id; + if (ci.hasGeneratedColumnFlag()) + { + const auto & col_name = GeneratedColumnPlaceholderBlockInputStream::getColumnName(i); + const auto & data_type = getDataTypeByColumnInfoForComputingLayer(ci); + source_columns_of_analyzer.emplace_back(col_name, data_type); + continue; + } + if (cid == EXTRA_TABLE_ID_COLUMN_ID) + { + source_columns_of_analyzer.emplace_back(EXTRA_TABLE_ID_COLUMN_NAME, EXTRA_TABLE_ID_COLUMN_TYPE); + continue; + } + RUNTIME_CHECK_MSG(columns_to_read_map.contains(cid), "ColumnID({}) not found in columns_to_read_map", cid); + source_columns_of_analyzer.emplace_back(columns_to_read_map.at(cid).name, columns_to_read_map.at(cid).type); + } + auto analyzer = std::make_unique(source_columns_of_analyzer, context); + + // Build the extra cast + ExpressionActionsPtr extra_cast = nullptr; + // need_cast_column should be the same size as table_scan_column_info and source_columns_of_analyzer + std::vector may_need_add_cast_column; + may_need_add_cast_column.reserve(table_scan_column_info.size()); + for (const auto & col : table_scan_column_info) + may_need_add_cast_column.push_back( + !col.hasGeneratedColumnFlag() && filter_col_id_set.contains(col.id) && col.id != -1); + ExpressionActionsChain chain; + auto & step = analyzer->initAndGetLastStep(chain); + auto & actions = step.actions; + if (auto [has_cast, casted_columns] + = analyzer->buildExtraCastsAfterTS(actions, may_need_add_cast_column, table_scan_column_info); + has_cast) + { + NamesWithAliases project_cols; + for (size_t i = 0; i < columns_to_read.size(); ++i) + { + if (filter_col_id_set.contains(columns_to_read[i].id)) + project_cols.emplace_back(casted_columns[i], columns_to_read[i].name); + } + actions->add(ExpressionAction::project(project_cols)); + + for (const auto & col : *filter_columns) + step.required_output.push_back(col.name); + + extra_cast = chain.getLastActions(); + chain.finalize(); + chain.clear(); + LOG_DEBUG(tracing_logger, "Extra cast for filter columns: {}", extra_cast->dumpActions()); + } + + // build filter expression actions + auto [before_where, filter_column_name, project_after_where] = analyzer->buildPushDownFilter(pushed_down_filters); + LOG_DEBUG(tracing_logger, "Push down filter: {}", before_where->dumpActions()); + + // record current column defines + auto columns_after_cast = std::make_shared(); + if (extra_cast != nullptr) + { + columns_after_cast->reserve(columns_to_read.size()); + const auto & current_names_and_types = analyzer->getCurrentInputColumns(); + for (size_t i = 0; i < table_scan_column_info.size(); ++i) + { + if (table_scan_column_info[i].hasGeneratedColumnFlag() + || table_scan_column_info[i].id == EXTRA_TABLE_ID_COLUMN_ID) + continue; + auto col = columns_to_read_map.at(table_scan_column_info[i].id); + RUNTIME_CHECK_MSG( + col.name == current_names_and_types[i].name, + "Column name mismatch, expect: {}, actual: {}", + col.name, + current_names_and_types[i].name); + columns_after_cast->push_back(col); + columns_after_cast->back().type = current_names_and_types[i].type; + } + } + + return std::make_shared( + rs_operator, + before_where, + project_after_where, + filter_columns, + filter_column_name, + extra_cast, + columns_after_cast); +} + +PushDownFilterPtr PushDownFilter::build( + const SelectQueryInfo & query_info, + const ColumnDefines & columns_to_read, + const ColumnDefines & table_column_defines, + const Context & context, + const LoggerPtr & tracing_logger) +{ + const auto & dag_query = query_info.dag_query; + if (unlikely(dag_query == nullptr)) + return EMPTY_FILTER; + + // build rough set operator + const auto rs_operator = RSOperator::build( + dag_query, + columns_to_read, + table_column_defines, + context.getSettingsRef().dt_enable_rough_set_filter, + tracing_logger); + // build push down filter + const auto & columns_to_read_info = dag_query->source_columns; + const auto & pushed_down_filters = dag_query->pushed_down_filters; + if (unlikely(context.getSettingsRef().force_push_down_all_filters_to_scan) && !dag_query->filters.empty()) + { + google::protobuf::RepeatedPtrField merged_filters{ + pushed_down_filters.begin(), + pushed_down_filters.end()}; + merged_filters.MergeFrom(dag_query->filters); + return PushDownFilter::build( + rs_operator, + columns_to_read_info, + merged_filters, + columns_to_read, + context, + tracing_logger); + } + return PushDownFilter::build( + rs_operator, + columns_to_read_info, + pushed_down_filters, + columns_to_read, + context, + tracing_logger); +} +} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.h b/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.h index 97d69897e00..0b5a43af579 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.h +++ b/dbms/src/Storages/DeltaMerge/Filter/PushDownFilter.h @@ -14,9 +14,15 @@ #pragma once +#include #include #include +namespace DB +{ +struct SelectQueryInfo; +} + namespace DB::DM { @@ -48,6 +54,23 @@ class PushDownFilter : rs_operator(rs_operator_) {} + // Use by StorageDisaggregated. + static PushDownFilterPtr build( + const DM::RSOperatorPtr & rs_operator, + const ColumnInfos & table_scan_column_info, + const google::protobuf::RepeatedPtrField & pushed_down_filters, + const ColumnDefines & columns_to_read, + const Context & context, + const LoggerPtr & tracing_logger); + + // Use by StorageDeltaMerge. + static DM::PushDownFilterPtr build( + const SelectQueryInfo & query_info, + const ColumnDefines & columns_to_read, + const ColumnDefines & table_column_defines, + const Context & context, + const LoggerPtr & tracing_logger); + // Rough set operator RSOperatorPtr rs_operator; // Filter expression actions and the name of the tmp filter column diff --git a/dbms/src/Storages/DeltaMerge/Filter/RSOperator.cpp b/dbms/src/Storages/DeltaMerge/Filter/RSOperator.cpp index 9928edb16b5..cc24a5948b9 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/RSOperator.cpp +++ b/dbms/src/Storages/DeltaMerge/Filter/RSOperator.cpp @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include #include #include @@ -26,6 +28,7 @@ #include #include #include +#include namespace DB::DM { @@ -43,7 +46,41 @@ RSOperatorPtr createNot(const RSOperatorPtr & op) RSOperatorPtr createNotEqual(const Attr & attr, const Field & value) { return std::make_shared(attr, value); } RSOperatorPtr createOr(const RSOperators & children) { return std::make_shared(children); } RSOperatorPtr createIsNull(const Attr & attr) { return std::make_shared(attr);} -RSOperatorPtr createUnsupported(const String & content, const String & reason) { return std::make_shared(content, reason); } +RSOperatorPtr createUnsupported(const String & reason) { return std::make_shared(reason); } // clang-format on +RSOperatorPtr RSOperator::build( + const std::unique_ptr & dag_query, + const ColumnDefines & columns_to_read, + const ColumnDefines & table_column_defines, + bool enable_rs_filter, + const LoggerPtr & tracing_logger) +{ + RUNTIME_CHECK(dag_query != nullptr); + // build rough set operator + if (unlikely(!enable_rs_filter)) + { + LOG_DEBUG(tracing_logger, "Rough set filter is disabled."); + return EMPTY_RS_OPERATOR; + } + + /// Query from TiDB / TiSpark + auto create_attr_by_column_id = [&table_column_defines](ColumnID column_id) -> Attr { + auto iter = std::find_if( + table_column_defines.begin(), + table_column_defines.end(), + [column_id](const ColumnDefine & d) -> bool { return d.id == column_id; }); + if (iter != table_column_defines.end()) + return Attr{.col_name = iter->name, .col_id = iter->id, .type = iter->type}; + // Maybe throw an exception? Or check if `type` is nullptr before creating filter? + return Attr{.col_name = "", .col_id = column_id, .type = DataTypePtr{}}; + }; + DM::RSOperatorPtr rs_operator + = FilterParser::parseDAGQuery(*dag_query, columns_to_read, std::move(create_attr_by_column_id), tracing_logger); + if (likely(rs_operator != DM::EMPTY_RS_OPERATOR)) + LOG_DEBUG(tracing_logger, "Rough set filter: {}", rs_operator->toDebugString()); + + return rs_operator; +} + } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Filter/RSOperator.h b/dbms/src/Storages/DeltaMerge/Filter/RSOperator.h index 4bf68ccbb68..40dcba49ce6 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/RSOperator.h +++ b/dbms/src/Storages/DeltaMerge/Filter/RSOperator.h @@ -19,6 +19,11 @@ #include #include +namespace DB +{ +struct DAGQueryInfo; +} + namespace DB::DM { @@ -48,6 +53,13 @@ class RSOperator virtual RSResults roughCheck(size_t start_pack, size_t pack_count, const RSCheckParam & param) = 0; virtual ColIds getColumnIDs() = 0; + + static RSOperatorPtr build( + const std::unique_ptr & dag_query, + const ColumnDefines & columns_to_read, + const ColumnDefines & table_column_defines, + bool enable_rs_filter, + const LoggerPtr & tracing_logger); }; class ColCmpVal : public RSOperator @@ -151,6 +163,6 @@ RSOperatorPtr createLike(const Attr & attr, const Field & value); // RSOperatorPtr createIsNull(const Attr & attr); // -RSOperatorPtr createUnsupported(const String & content, const String & reason); +RSOperatorPtr createUnsupported(const String & reason); } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Filter/Unsupported.h b/dbms/src/Storages/DeltaMerge/Filter/Unsupported.h index 3b82bc27575..ee3d4f1b414 100644 --- a/dbms/src/Storages/DeltaMerge/Filter/Unsupported.h +++ b/dbms/src/Storages/DeltaMerge/Filter/Unsupported.h @@ -21,23 +21,18 @@ namespace DB::DM class Unsupported : public RSOperator { - String content; String reason; public: - Unsupported(const String & content_, const String & reason_) - : content(content_) - , reason(reason_) + explicit Unsupported(const String & reason_) + : reason(reason_) {} String name() override { return "unsupported"; } ColIds getColumnIDs() override { return {}; } - String toDebugString() override - { - return fmt::format(R"({{"op":"{}","reason":"{}","content":"{}"}})", name(), reason, content); - } + String toDebugString() override { return fmt::format(R"({{"op":"{}","reason":"{}"}})", name(), reason); } RSResults roughCheck(size_t /*start_pack*/, size_t pack_count, const RSCheckParam & /*param*/) override { diff --git a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp index 0679e41a3cb..8541b5dc5b0 100644 --- a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp +++ b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp @@ -86,9 +86,10 @@ ColumnDefine getColumnDefineForColumnExpr(const tipb::Expr & expr, const ColumnD if (column_index < 0 || column_index >= static_cast(columns_to_read.size())) { throw TiFlashException( - "Column index out of bound: " + DB::toString(column_index) + ", should in [0," - + DB::toString(columns_to_read.size()) + ")", - Errors::Coprocessor::BadRequest); + Errors::Coprocessor::BadRequest, + "Column index out of bound: {}, should in [0,{})", + column_index, + columns_to_read.size()); } return columns_to_read[column_index]; } @@ -114,12 +115,10 @@ inline RSOperatorPtr parseTiCompareExpr( // const TimezoneInfo & timezone_info) { if (unlikely(expr.children_size() != 2 && filter_type != FilterParser::RSFilterType::In)) - return createUnsupported( - expr.ShortDebugString(), - fmt::format( - "{} with {} children is not supported", - tipb::ScalarFuncSig_Name(expr.sig()), - expr.children_size())); + return createUnsupported(fmt::format( + "{} with {} children is not supported", + tipb::ScalarFuncSig_Name(expr.sig()), + expr.children_size())); // Support three types of expression: // 1. op(column, literal), in sql: column op literal @@ -148,16 +147,21 @@ inline RSOperatorPtr parseTiCompareExpr( // if (column_expr_child_idx == -1) column_expr_child_idx = child_idx; else - return createUnsupported(expr.ShortDebugString(), "Multiple ColumnRef in expression is not supported"); + return createUnsupported(fmt::format( + "Multiple ColumnRef in expression is not supported, sig={}", + tipb::ScalarFuncSig_Name(expr.sig()))); if (unlikely(!child.has_field_type())) - return createUnsupported(expr.ShortDebugString(), "ColumnRef with no field type is not supported"); + return createUnsupported(fmt::format( + "ColumnRef with no field type is not supported, sig={}", + tipb::ScalarFuncSig_Name(expr.sig()))); auto field_type = child.field_type().tp(); if (!isRoughSetFilterSupportType(field_type)) - return createUnsupported( - expr.ShortDebugString(), - fmt::format("ColumnRef with field type({}) is not supported", field_type)); + return createUnsupported(fmt::format( + "ColumnRef with field type is not supported, sig={} field_type={}", + tipb::ScalarFuncSig_Name(expr.sig()), + field_type)); const auto col = getColumnDefineForColumnExpr(child, columns_to_read); attr = creator(col.id); @@ -169,9 +173,10 @@ inline RSOperatorPtr parseTiCompareExpr( // { auto literal_type = child.field_type().tp(); if (unlikely(literal_type != TiDB::TypeTimestamp && literal_type != TiDB::TypeDatetime)) - return createUnsupported( - expr.ShortDebugString(), - fmt::format("Compare timestamp column with literal type({}) is not supported", literal_type)); + return createUnsupported(fmt::format( + "Compare timestamp column with literal type is not supported, sig={} literal_type={}", + tipb::ScalarFuncSig_Name(expr.sig()), + literal_type)); // convert literal value from timezone specified in cop request to UTC if (literal_type == TiDB::TypeDatetime && !timezone_info.is_utc_timezone) convertFieldWithTimezone(value, timezone_info); @@ -182,25 +187,25 @@ inline RSOperatorPtr parseTiCompareExpr( // { // Any other type of child is not supported, like: ScalarFunc. // case like `cast(a as signed) > 1`, `a in (0, cast(a as signed))` is not supported. - return createUnsupported( - expr.ShortDebugString(), - fmt::format("Unknown child type: {}", tipb::ExprType_Name(child.tp()))); + return createUnsupported(fmt::format( + "Unknown child type, sig={} child_tp={}", + tipb::ScalarFuncSig_Name(expr.sig()), + tipb::ExprType_Name(child.tp()))); } } // At least one ColumnRef and one Literal if (unlikely(column_expr_child_idx == -1)) - return createUnsupported(expr.ShortDebugString(), "No ColumnRef in expression"); + return createUnsupported("No ColumnRef in expression"); if (unlikely(values.empty())) - return createUnsupported(expr.ShortDebugString(), "No Literal in expression"); + return createUnsupported("No Literal in expression"); // For compare expression, only support one Literal if (unlikely(values.size() > 1 && filter_type != FilterParser::RSFilterType::In)) return createUnsupported( - expr.ShortDebugString(), - fmt::format("Multiple Literal in compare expression is not supported, size: {}", values.size())); + fmt::format("Multiple Literal in compare expression is not supported, size={}", values.size())); // For In type, the first child must be ColumnRef if (column_expr_child_idx != 0 && filter_type == FilterParser::RSFilterType::In) - return createUnsupported(expr.ShortDebugString(), "the first child of In expression must be ColumnRef"); + return createUnsupported("the first child of In expression must be ColumnRef"); bool inverse_cmp = column_expr_child_idx == 1; switch (filter_type) @@ -232,9 +237,7 @@ inline RSOperatorPtr parseTiCompareExpr( // case FilterParser::RSFilterType::In: return createIn(attr, values); default: - return createUnsupported( - expr.ShortDebugString(), - fmt::format("Unknown compare type: {}", tipb::ExprType_Name(expr.tp()))); + return createUnsupported(fmt::format("Unknown compare type: {}", tipb::ExprType_Name(expr.tp()))); } } @@ -246,9 +249,11 @@ RSOperatorPtr parseTiExpr( const LoggerPtr & log) { if (unlikely(!isFunctionExpr(expr))) - return createUnsupported(expr.ShortDebugString(), "child of logical and is not function"); + return createUnsupported( + fmt::format("child of logical and is not function, expr.tp={}", tipb::ExprType_Name(expr.tp()))); if (unlikely(isAggFunctionExpr(expr))) - return createUnsupported(expr.ShortDebugString(), "agg function: " + tipb::ExprType_Name(expr.tp())); + return createUnsupported( + fmt::format("agg function is not support, expr.tp={}", tipb::ExprType_Name(expr.tp()))); String reason = fmt::format("{} is not supported", tipb::ScalarFuncSig_Name(expr.sig())); if (auto iter = FilterParser::scalar_func_rs_filter_map.find(expr.sig()); @@ -264,13 +269,15 @@ RSOperatorPtr parseTiExpr( case FilterParser::RSFilterType::Not: { if (unlikely(expr.children_size() != 1)) - { - reason = fmt::format("logical not with {} children is not supported", expr.children_size()); - break; - } + return createUnsupported( + fmt::format("logical not with {} children is not supported", expr.children_size())); + if (const auto & child = expr.children(0); likely(isFunctionExpr(child))) return createNot(parseTiExpr(child, columns_to_read, creator, timezone_info, log)); - reason = "child of logical not is not function"; + else + return createUnsupported(fmt::format( + "child of logical not is not function, child_type={}", + tipb::ExprType_Name(child.tp()))); break; } @@ -283,8 +290,9 @@ RSOperatorPtr parseTiExpr( if (likely(isFunctionExpr(child))) children.emplace_back(parseTiExpr(child, columns_to_read, creator, timezone_info, log)); else - children.emplace_back( - createUnsupported(child.ShortDebugString(), "child of logical operator is not function")); + children.emplace_back(createUnsupported(fmt::format( + "child of logical operator is not function, child_type={}", + tipb::ExprType_Name(child.tp())))); } if (expr.sig() == tipb::ScalarFuncSig::LogicalAnd) return createAnd(children); @@ -312,9 +320,10 @@ RSOperatorPtr parseTiExpr( // but in RSResult (a > 1), we will get the result RSResult::None, and then we think the result is the empty set. if (unlikely(expr.children_size() != 1)) { - reason = fmt::format("filter IsNull with {} children is not supported", expr.children_size()); - break; + return createUnsupported( + fmt::format("filter IsNull with {} children is not supported", expr.children_size())); } + const auto & child = expr.children(0); if (likely(isColumnExpr(child))) { @@ -325,21 +334,23 @@ RSOperatorPtr parseTiExpr( Attr attr = creator(col.id); return createIsNull(attr); } - reason = fmt::format("ColumnRef with field type({}) is not supported", tipb::ExprType_Name(expr.tp())); + return createUnsupported( + fmt::format("ColumnRef with field type is not supported, filed_type={}", field_type)); } else { - reason = "child of IsNull is not ColumnRef"; + return createUnsupported( + fmt::format("child of IsNull is not ColumnRef, expr_tp={}", tipb::ExprType_Name(child.tp()))); } break; } // Unsupported filter type: case FilterParser::RSFilterType::Like: case FilterParser::RSFilterType::Unsupported: - break; + return createUnsupported(reason); } } - return createUnsupported(expr.ShortDebugString(), reason); + return createUnsupported(reason); } } // namespace cop @@ -383,7 +394,8 @@ RSOperatorPtr FilterParser::parseRFInExpr( case tipb::IN: { if (!isColumnExpr(target_expr)) - return createUnsupported(target_expr.ShortDebugString(), "rf target expr is not column expr"); + return createUnsupported( + fmt::format("rf target expr is not column expr, expr.tp={}", tipb::ExprType_Name(target_expr.tp()))); auto column_define = cop::getColumnDefineForColumnExpr(target_expr, columns_to_read); auto attr = Attr{.col_name = column_define.name, .col_id = column_define.id, .type = column_define.type}; if (target_expr.field_type().tp() == TiDB::TypeTimestamp && !timezone_info.is_utc_timezone) @@ -405,7 +417,7 @@ RSOperatorPtr FilterParser::parseRFInExpr( } case tipb::MIN_MAX: case tipb::BLOOM_FILTER: - return createUnsupported(target_expr.ShortDebugString(), "function params should be in predicate"); + return createUnsupported("function params should be in predicate"); } } diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp index def138d1f66..25bc366b5ce 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp @@ -29,7 +29,6 @@ #include #include - namespace DB::DM { @@ -90,6 +89,16 @@ inline std::pair minmax( return {batch_min_idx, batch_max_idx}; } + +// Before v6.4.0, we used null as the minimum value. +// Since v6.4.0, we have excluded null when calculating the maximum and minimum values. +// If the minimum value is null, this minmax index is generated before v6.4.0. +// For compatibility, the filter result of the corresponding pack should be Some, +// and the upper layer will read the pack data to perform the filter calculation. +ALWAYS_INLINE bool minIsNull(const DB::ColumnUInt8 & null_map, size_t i) +{ + return null_map.getElement(i * 2); +} } // namespace details void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * del_mark) @@ -130,15 +139,15 @@ void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * de if (min_index != NONE_EXIST) { - has_null_marks->push_back(has_null); - has_value_marks->push_back(1); + has_null_marks.push_back(has_null); + has_value_marks.push_back(1); minmaxes->insertFrom(column, min_index); minmaxes->insertFrom(column, max_index); } else { - has_null_marks->push_back(has_null); - has_value_marks->push_back(0); + has_null_marks.push_back(has_null); + has_value_marks.push_back(0); minmaxes->insertDefault(); minmaxes->insertDefault(); } @@ -146,10 +155,10 @@ void MinMaxIndex::addPack(const IColumn & column, const ColumnVector * de void MinMaxIndex::write(const IDataType & type, WriteBuffer & buf) { - UInt64 size = has_null_marks->size(); + UInt64 size = has_null_marks.size(); DB::writeIntBinary(size, buf); - buf.write(reinterpret_cast(has_null_marks->data()), sizeof(UInt8) * size); - buf.write(reinterpret_cast(has_value_marks->data()), sizeof(UInt8) * size); + buf.write(reinterpret_cast(has_null_marks.data()), sizeof(UInt8) * size); + buf.write(reinterpret_cast(has_value_marks.data()), sizeof(UInt8) * size); type.serializeBinaryBulkWithMultipleStreams( *minmaxes, // [&](const IDataType::SubstreamPath &) { return &buf; }, @@ -167,11 +176,11 @@ MinMaxIndexPtr MinMaxIndex::read(const IDataType & type, ReadBuffer & buf, size_ { DB::readIntBinary(size, buf); } - auto has_null_marks = std::make_shared>(size); - auto has_value_marks = std::make_shared>(size); + PaddedPODArray has_null_marks(size); + PaddedPODArray has_value_marks(size); auto minmaxes = type.createColumn(); - buf.read(reinterpret_cast(has_null_marks->data()), sizeof(UInt8) * size); - buf.read(reinterpret_cast(has_value_marks->data()), sizeof(UInt8) * size); + buf.read(reinterpret_cast(has_null_marks.data()), sizeof(UInt8) * size); + buf.read(reinterpret_cast(has_value_marks.data()), sizeof(UInt8) * size); type.deserializeBinaryBulkWithMultipleStreams( *minmaxes, // [&](const IDataType::SubstreamPath &) { return &buf; }, @@ -187,8 +196,7 @@ MinMaxIndexPtr MinMaxIndex::read(const IDataType & type, ReadBuffer & buf, size_ + " vs. actual: " + std::to_string(bytes_read), Errors::DeltaTree::Internal); } - // NOLINTNEXTLINE (call private constructor of MinMaxIndex to build shared_ptr) - return MinMaxIndexPtr(new MinMaxIndex(has_null_marks, has_value_marks, std::move(minmaxes))); + return std::make_shared(std::move(has_null_marks), std::move(has_value_marks), std::move(minmaxes)); } std::pair MinMaxIndex::getIntMinMax(size_t pack_index) @@ -230,8 +238,7 @@ RSResults MinMaxIndex::checkNullableInImpl( const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { - // if min is null, result is Some - if (null_map.getElement(i * 2)) + if (details::minIsNull(null_map, i)) continue; auto min = minmaxes_data[i * 2]; auto max = minmaxes_data[i * 2 + 1]; @@ -276,8 +283,7 @@ RSResults MinMaxIndex::checkNullableIn( const auto & offsets = string_column->getOffsets(); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { - bool min_is_null = null_map.getElement(i * 2); - if (min_is_null) + if (details::minIsNull(null_map, i)) continue; size_t pos = i * 2; size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; @@ -325,7 +331,7 @@ RSResults MinMaxIndex::checkInImpl( const auto & minmaxes_data = toColumnVectorData(minmaxes); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { - if (!(*has_value_marks)[i]) + if (!has_value_marks[i]) continue; auto min = minmaxes_data[i * 2]; auto max = minmaxes_data[i * 2 + 1]; @@ -365,7 +371,7 @@ RSResults MinMaxIndex::checkIn( const auto & offsets = string_column->getOffsets(); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { - if (!(*has_value_marks)[i]) + if (!has_value_marks[i]) continue; size_t pos = i * 2; size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; @@ -396,7 +402,7 @@ RSResults MinMaxIndex::checkCmpImpl(size_t start_pack, size_t pack_count, const const auto & minmaxes_data = toColumnVectorData(minmaxes); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { - if (!(*has_value_marks)[i]) + if (!has_value_marks[i]) continue; auto min = minmaxes_data[i * 2]; auto max = minmaxes_data[i * 2 + 1]; @@ -434,7 +440,7 @@ RSResults MinMaxIndex::checkCmp(size_t start_pack, size_t pack_count, const Fiel const auto & offsets = string_column->getOffsets(); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { - if (!(*has_value_marks)[i]) + if (!has_value_marks[i]) continue; size_t pos = i * 2; size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; @@ -487,8 +493,7 @@ RSResults MinMaxIndex::checkNullableCmpImpl( const auto & minmaxes_data = toColumnVectorData(column_nullable.getNestedColumnPtr()); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { - // if min is null, result is Some - if (null_map.getElement(i * 2)) + if (details::minIsNull(null_map, i)) continue; auto min = minmaxes_data[i * 2]; auto max = minmaxes_data[i * 2 + 1]; @@ -534,7 +539,7 @@ RSResults MinMaxIndex::checkNullableCmp( const auto & offsets = string_column->getOffsets(); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { - if (null_map.getElement(i * 2)) + if (details::minIsNull(null_map, i)) continue; size_t pos = i * 2; size_t prev_offset = pos == 0 ? 0 : offsets[pos - 1]; @@ -571,20 +576,19 @@ RSResults MinMaxIndex::checkNullableCmp( return results; } +// If a pack only contains null marks and delete marks, checkIsNull will return RSResult::All. +// This is safe because MVCC will read the tag column and the deleted rows will be filtered out. RSResults MinMaxIndex::checkIsNull(size_t start_pack, size_t pack_count) { RSResults results(pack_count, RSResult::None); for (size_t i = start_pack; i < start_pack + pack_count; ++i) { - if ((*has_null_marks)[i]) - results[i - start_pack] = RSResult::Some; + if (has_null_marks[i]) + { + results[i - start_pack] = has_value_marks[i] ? RSResult::Some : RSResult::All; + } } return results; } -String MinMaxIndex::toString() -{ - return ""; -} - } // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h index 5fee21f0f69..9f617c2969b 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.h @@ -18,44 +18,30 @@ #include #include #include -#include #include #include #include #include -namespace DB -{ -namespace DM +namespace DB::DM { class MinMaxIndex; using MinMaxIndexPtr = std::shared_ptr; class MinMaxIndex { -private: - using HasValueMarkPtr = std::shared_ptr>; - using HasNullMarkPtr = std::shared_ptr>; - - HasNullMarkPtr has_null_marks; - HasValueMarkPtr has_value_marks; - MutableColumnPtr minmaxes; - public: -#ifndef DBMS_PUBLIC_GTEST -private: -#endif - MinMaxIndex(HasNullMarkPtr has_null_marks_, HasValueMarkPtr has_value_marks_, MutableColumnPtr && minmaxes_) - : has_null_marks(has_null_marks_) - , has_value_marks(has_value_marks_) + MinMaxIndex( + PaddedPODArray && has_null_marks_, + PaddedPODArray && has_value_marks_, + MutableColumnPtr && minmaxes_) + : has_null_marks(std::move(has_null_marks_)) + , has_value_marks(std::move(has_value_marks_)) , minmaxes(std::move(minmaxes_)) {} -public: explicit MinMaxIndex(const IDataType & type) - : has_null_marks(std::make_shared>()) - , has_value_marks(std::make_shared>()) - , minmaxes(type.createColumn()) + : minmaxes(type.createColumn()) {} size_t byteSize() const @@ -63,7 +49,7 @@ class MinMaxIndex // we add 3 * sizeof(PaddedPODArray) // because has_null_marks/ has_value_marks / minmaxes are all use PaddedPODArray // Thus we need to add the structual memory cost of PaddedPODArray for each of them - return sizeof(UInt8) * has_null_marks->size() + sizeof(UInt8) * has_value_marks->size() + minmaxes->byteSize() + return sizeof(UInt8) * has_null_marks.size() + sizeof(UInt8) * has_value_marks.size() + minmaxes->byteSize() + 3 * sizeof(PaddedPODArray); } @@ -83,6 +69,17 @@ class MinMaxIndex template RSResults checkCmp(size_t start_pack, size_t pack_count, const Field & value, const DataTypePtr & type); + + // TODO: merge with checkCmp + RSResults checkIn( + size_t start_pack, + size_t pack_count, + const std::vector & values, + const DataTypePtr & type); + + RSResults checkIsNull(size_t start_pack, size_t pack_count); + +private: template RSResults checkCmpImpl(size_t start_pack, size_t pack_count, const Field & value, const DataTypePtr & type); template @@ -96,12 +93,6 @@ class MinMaxIndex const Field & value, const DataTypePtr & type); - // TODO: merge with checkCmp - RSResults checkIn( - size_t start_pack, - size_t pack_count, - const std::vector & values, - const DataTypePtr & type); template RSResults checkInImpl( size_t start_pack, @@ -122,12 +113,11 @@ class MinMaxIndex const std::vector & values, const DataTypePtr & type); - RSResults checkIsNull(size_t start_pack, size_t pack_count); - - static String toString(); + PaddedPODArray has_null_marks; + PaddedPODArray has_value_marks; + MutableColumnPtr minmaxes; }; - struct MinMaxIndexWeightFunction { size_t operator()(const String & key, const MinMaxIndex & index) const @@ -150,7 +140,6 @@ struct MinMaxIndexWeightFunction } }; - class MinMaxIndexCache : public LRUCache, MinMaxIndexWeightFunction> { private: @@ -171,6 +160,4 @@ class MinMaxIndexCache : public LRUCache, using MinMaxIndexCachePtr = std::shared_ptr; -} // namespace DM - -} // namespace DB +} // namespace DB::DM diff --git a/dbms/src/Storages/DeltaMerge/Index/RSIndex.h b/dbms/src/Storages/DeltaMerge/Index/RSIndex.h index 1cfa9aebf31..7ff94a2f962 100644 --- a/dbms/src/Storages/DeltaMerge/Index/RSIndex.h +++ b/dbms/src/Storages/DeltaMerge/Index/RSIndex.h @@ -16,40 +16,19 @@ #include -namespace DB +namespace DB::DM { -namespace DM -{ -class EqualIndex; -using EqualIndexPtr = std::shared_ptr; - - -class EqualIndex -{ -public: - virtual ~EqualIndex() = default; -}; - struct RSIndex { DataTypePtr type; MinMaxIndexPtr minmax; - EqualIndexPtr equal; RSIndex(const DataTypePtr & type_, const MinMaxIndexPtr & minmax_) : type(type_) , minmax(minmax_) {} - - RSIndex(const DataTypePtr & type_, const MinMaxIndexPtr & minmax_, const EqualIndexPtr & equal_) - : type(type_) - , minmax(minmax_) - , equal(equal_) - {} }; using ColumnIndexes = std::unordered_map; -} // namespace DM - -} // namespace DB \ No newline at end of file +} // namespace DB::DM \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/Index/RSResult.h b/dbms/src/Storages/DeltaMerge/Index/RSResult.h index 9617f900dad..21c1cad6911 100644 --- a/dbms/src/Storages/DeltaMerge/Index/RSResult.h +++ b/dbms/src/Storages/DeltaMerge/Index/RSResult.h @@ -75,6 +75,10 @@ inline RSResult operator&&(RSResult v0, RSResult v1) return Some; } +ALWAYS_INLINE inline bool isUse(RSResult res) noexcept +{ + return res != RSResult::None; +} } // namespace DM } // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/DeltaMerge/ScanContext.cpp b/dbms/src/Storages/DeltaMerge/ScanContext.cpp index 8dae573e5d8..5a5b126a40e 100644 --- a/dbms/src/Storages/DeltaMerge/ScanContext.cpp +++ b/dbms/src/Storages/DeltaMerge/ScanContext.cpp @@ -104,6 +104,13 @@ String ScanContext::toJson() const json->set("dmfile_lm_filter_skipped_rows", dmfile_lm_filter_skipped_rows.load()); json->set("dmfile_read_time", fmt::format("{:.3f}ms", total_dmfile_read_time_ns.load() / NS_TO_MS_SCALE)); + json->set( + "rs_pack_filter_check_time", + fmt::format("{:.3f}ms", total_rs_pack_filter_check_time_ns.load() / NS_TO_MS_SCALE)); + json->set("rs_pack_filter_none", rs_pack_filter_none.load()); + json->set("rs_pack_filter_some", rs_pack_filter_some.load()); + json->set("rs_pack_filter_all", rs_pack_filter_all.load()); + json->set("num_remote_region", total_remote_region_num.load()); json->set("num_local_region", total_local_region_num.load()); json->set("num_stale_read", num_stale_read.load()); diff --git a/dbms/src/Storages/DeltaMerge/ScanContext.h b/dbms/src/Storages/DeltaMerge/ScanContext.h index 78321dd8737..fdf26eb5200 100644 --- a/dbms/src/Storages/DeltaMerge/ScanContext.h +++ b/dbms/src/Storages/DeltaMerge/ScanContext.h @@ -40,10 +40,13 @@ class ScanContext std::atomic dmfile_mvcc_skipped_rows{0}; std::atomic dmfile_lm_filter_scanned_rows{0}; std::atomic dmfile_lm_filter_skipped_rows{0}; - - std::atomic total_dmfile_rough_set_index_check_time_ns{0}; std::atomic total_dmfile_read_time_ns{0}; + std::atomic total_rs_pack_filter_check_time_ns{0}; + std::atomic rs_pack_filter_none{0}; + std::atomic rs_pack_filter_some{0}; + std::atomic rs_pack_filter_all{0}; + std::atomic total_remote_region_num{0}; std::atomic total_local_region_num{0}; std::atomic num_stale_read{0}; @@ -75,7 +78,6 @@ class ScanContext std::atomic mvcc_output_rows{0}; std::atomic late_materialization_skip_rows{0}; - // TODO: filter // Learner read std::atomic learner_read_ns{0}; // Create snapshot from PageStorage @@ -98,7 +100,8 @@ class ScanContext dmfile_mvcc_skipped_rows = tiflash_scan_context_pb.dmfile_mvcc_skipped_rows(); dmfile_lm_filter_scanned_rows = tiflash_scan_context_pb.dmfile_lm_filter_scanned_rows(); dmfile_lm_filter_skipped_rows = tiflash_scan_context_pb.dmfile_lm_filter_skipped_rows(); - total_dmfile_rough_set_index_check_time_ns = tiflash_scan_context_pb.total_dmfile_rs_check_ms() * 1000000; + total_rs_pack_filter_check_time_ns = tiflash_scan_context_pb.total_dmfile_rs_check_ms() * 1000000; + // TODO: rs_pack_filter_none, rs_pack_filter_some, rs_pack_filter_all total_dmfile_read_time_ns = tiflash_scan_context_pb.total_dmfile_read_ms() * 1000000; create_snapshot_time_ns = tiflash_scan_context_pb.total_build_snapshot_ms() * 1000000; total_remote_region_num = tiflash_scan_context_pb.remote_regions(); @@ -140,7 +143,8 @@ class ScanContext tiflash_scan_context_pb.set_dmfile_mvcc_skipped_rows(dmfile_mvcc_skipped_rows); tiflash_scan_context_pb.set_dmfile_lm_filter_scanned_rows(dmfile_lm_filter_scanned_rows); tiflash_scan_context_pb.set_dmfile_lm_filter_skipped_rows(dmfile_lm_filter_skipped_rows); - tiflash_scan_context_pb.set_total_dmfile_rs_check_ms(total_dmfile_rough_set_index_check_time_ns / 1000000); + tiflash_scan_context_pb.set_total_dmfile_rs_check_ms(total_rs_pack_filter_check_time_ns / 1000000); + // TODO: pack_filter_none, pack_filter_some, pack_filter_all tiflash_scan_context_pb.set_total_dmfile_read_ms(total_dmfile_read_time_ns / 1000000); tiflash_scan_context_pb.set_total_build_snapshot_ms(create_snapshot_time_ns / 1000000); tiflash_scan_context_pb.set_remote_regions(total_remote_region_num); @@ -182,7 +186,10 @@ class ScanContext dmfile_mvcc_skipped_rows += other.dmfile_mvcc_skipped_rows; dmfile_lm_filter_scanned_rows += other.dmfile_lm_filter_scanned_rows; dmfile_lm_filter_skipped_rows += other.dmfile_lm_filter_skipped_rows; - total_dmfile_rough_set_index_check_time_ns += other.total_dmfile_rough_set_index_check_time_ns; + total_rs_pack_filter_check_time_ns += other.total_rs_pack_filter_check_time_ns; + rs_pack_filter_none += other.rs_pack_filter_none; + rs_pack_filter_some += other.rs_pack_filter_some; + rs_pack_filter_all += other.rs_pack_filter_all; total_dmfile_read_time_ns += other.total_dmfile_read_time_ns; total_local_region_num += other.total_local_region_num; @@ -227,7 +234,8 @@ class ScanContext dmfile_mvcc_skipped_rows += other.dmfile_mvcc_skipped_rows(); dmfile_lm_filter_scanned_rows += other.dmfile_lm_filter_scanned_rows(); dmfile_lm_filter_skipped_rows += other.dmfile_lm_filter_skipped_rows(); - total_dmfile_rough_set_index_check_time_ns += other.total_dmfile_rs_check_ms() * 1000000; + total_rs_pack_filter_check_time_ns += other.total_dmfile_rs_check_ms() * 1000000; + // TODO: rs_pack_filter_none, rs_pack_filter_some, rs_pack_filter_all total_dmfile_read_time_ns += other.total_dmfile_read_ms() * 1000000; create_snapshot_time_ns += other.total_build_snapshot_ms() * 1000000; total_local_region_num += other.local_regions(); diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index f5fc2a0a654..07dc45c0dd8 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -2945,7 +2945,7 @@ std::pair, std::vector> parseDMFilePackInfo( dm_context.global_context.getReadLimiter(), dm_context.scan_context, dm_context.tracing_id); - const auto & use_packs = pack_filter.getUsePacksConst(); + const auto & pack_res = pack_filter.getPackResConst(); const auto & handle_res = pack_filter.getHandleRes(); const auto & pack_stats = dmfile->getPackStats(); @@ -2955,7 +2955,7 @@ std::pair, std::vector> parseDMFilePackInfo( { const auto & pack_stat = pack_stats[pack_id]; preceded_rows += pack_stat.rows; - if (!use_packs[pack_id]) + if (!isUse(pack_res[pack_id])) { continue; } diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index 9abaf8d14b5..bcf1b3d4058 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -16,7 +16,6 @@ #include #include -#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp index fe11a7ce203..a508170ce8c 100644 --- a/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp +++ b/dbms/src/Storages/DeltaMerge/StableValueSpace.cpp @@ -440,21 +440,22 @@ void StableValueSpace::calculateStableProperty( context.getReadLimiter(), context.scan_context, context.tracing_id); - const auto & use_packs = pack_filter.getUsePacksConst(); + const auto & pack_res = pack_filter.getPackResConst(); size_t new_pack_properties_index = 0; const bool use_new_pack_properties = pack_properties.property_size() == 0; if (use_new_pack_properties) { - const size_t use_packs_count = std::count(use_packs.begin(), use_packs.end(), true); + const size_t use_packs_count = pack_filter.countUsePack(); + RUNTIME_CHECK_MSG( static_cast(new_pack_properties.property_size()) == use_packs_count, "size doesn't match, new_pack_properties_size={} use_packs_size={}", new_pack_properties.property_size(), use_packs_count); } - for (size_t pack_id = 0; pack_id < use_packs.size(); ++pack_id) + for (size_t pack_id = 0; pack_id < pack_res.size(); ++pack_id) { - if (!use_packs[pack_id]) + if (!isUse(pack_res[pack_id])) continue; property.num_versions += pack_stats[pack_id].rows; property.num_puts += pack_stats[pack_id].rows - pack_stats[pack_id].not_clean; @@ -589,10 +590,10 @@ RowsAndBytes StableValueSpace::Snapshot::getApproxRowsAndBytes(const DMContext & context.scan_context, context.tracing_id); const auto & pack_stats = f->getPackStats(); - const auto & use_packs = filter.getUsePacksConst(); + const auto & pack_res = filter.getPackResConst(); for (size_t i = 0; i < pack_stats.size(); ++i) { - if (use_packs[i]) + if (isUse(pack_res[i])) { ++match_packs; total_match_rows += pack_stats[i].rows; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp index ba2627fbe93..d77c365492b 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp @@ -168,9 +168,9 @@ class DMFileMetaV2Test : public DB::base::TiFlashStorageTestBasic ASSERT_EQ(n, s.size()); } - static std::vector & getReaderUsePacks(DMFileBlockInputStreamPtr & stream) + static RSResults & getReaderPackRes(DMFileBlockInputStreamPtr & stream) { - return stream->reader.pack_filter.getUsePacks(); + return stream->reader.pack_filter.getPackRes(); } protected: @@ -905,10 +905,10 @@ try auto stream = builder.setColumnCache(column_cache) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}, std::make_shared()); - auto & use_packs = getReaderUsePacks(stream); - use_packs[1] = false; + auto & pack_res = getReaderPackRes(stream); + pack_res[1] = RSResult::None; stream->skipNextBlock(); - use_packs[1] = true; + pack_res[1] = RSResult::Some; std::vector partial_expect_arr_values; partial_expect_arr_values.insert( partial_expect_arr_values.cend(), @@ -1117,10 +1117,10 @@ try auto stream = builder.setColumnCache(column_cache) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}, std::make_shared()); - auto & use_packs = getReaderUsePacks(stream); - use_packs[1] = false; + auto & pack_res = getReaderPackRes(stream); + pack_res[1] = RSResult::None; ASSERT_EQ(stream->skipNextBlock(), num_rows_write / 3); - use_packs[1] = true; + pack_res[1] = RSResult::Some; ASSERT_INPUTSTREAM_COLS_UR( stream, Strings({DMTestEnv::pk_name}), @@ -1640,10 +1640,10 @@ try filters.emplace_back(one_part_filter, span_per_part); // only first part // // (first range) And (Unsuppported) -> should filter some chunks by range - filters.emplace_back(createAnd({one_part_filter, createUnsupported("test", "test")}), span_per_part); + filters.emplace_back(createAnd({one_part_filter, createUnsupported("test")}), span_per_part); // // (first range) Or (Unsupported) -> should NOT filter any chunk - filters.emplace_back(createOr({one_part_filter, createUnsupported("test", "test")}), num_rows_write); + filters.emplace_back(createOr({one_part_filter, createUnsupported("test")}), num_rows_write); auto test_read_filter = [&](const DM::RSOperatorPtr & filter, const size_t num_rows_should_read) { // Test read DMFileBlockInputStreamBuilder builder(dbContext()); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index ca70aba1c60..6c39ebf6737 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -23,15 +23,17 @@ #include #include #include +#include #include #include #include #include +#include #include #include -#include #include +#include #include namespace DB::DM::tests @@ -40,10 +42,10 @@ namespace DB::DM::tests static const ColId DEFAULT_COL_ID = 0; static const String DEFAULT_COL_NAME = "2020-09-26"; -class DMMinMaxIndexTest : public ::testing::Test +class MinMaxIndexTest : public ::testing::Test { public: - DMMinMaxIndexTest() = default; + MinMaxIndexTest() = default; protected: static void SetUpTestCase() {} @@ -53,13 +55,12 @@ class DMMinMaxIndexTest : public ::testing::Test context = DMTestEnv::getContext(); if (!context->getMinMaxIndexCache()) { - context->setMinMaxIndexCache(5368709120); + context->setMinMaxIndexCache(DEFAULT_MARK_CACHE_SIZE); } } void TearDown() override { context->dropMinMaxIndexCache(); } -private: protected: // a ptr to context, we can reload context with different settings if need. ContextPtr context; @@ -86,7 +87,7 @@ bool checkMatch( bool is_common_handle = false, bool check_pk = false) { - String name = "DMMinMaxIndexTest_" + test_case; + String name = "MinMaxIndexTest_" + test_case; // We cannot restore tables with the same table id multiple times in a single run. // Because we don't update max_page_id for PS instance at run time. // And when restoring table, it will use the max_page_id from PS as the start point for allocating page id. @@ -1180,7 +1181,7 @@ RSOperatorPtr generateRSOperator(MinMaxTestDatatype data_type, MinMaxTestOperato } } -TEST_F(DMMinMaxIndexTest, Equal) +TEST_F(MinMaxIndexTest, Equal) try { const auto * case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); @@ -1310,7 +1311,7 @@ try } CATCH -TEST_F(DMMinMaxIndexTest, Not) +TEST_F(MinMaxIndexTest, Not) try { const auto * case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); @@ -1441,7 +1442,7 @@ try } CATCH -TEST_F(DMMinMaxIndexTest, And) +TEST_F(MinMaxIndexTest, And) try { const auto * case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); @@ -1603,7 +1604,7 @@ try } CATCH -TEST_F(DMMinMaxIndexTest, Or) +TEST_F(MinMaxIndexTest, Or) try { const auto * case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); @@ -1739,7 +1740,7 @@ try } CATCH -TEST_F(DMMinMaxIndexTest, IsNull) +TEST_F(MinMaxIndexTest, IsNull) try { const auto * case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); @@ -1870,7 +1871,7 @@ try CATCH -TEST_F(DMMinMaxIndexTest, checkPKMatch) +TEST_F(MinMaxIndexTest, checkPKMatch) try { const auto * case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); @@ -1882,7 +1883,7 @@ try } CATCH -TEST_F(DMMinMaxIndexTest, DelMark) +TEST_F(MinMaxIndexTest, DelMark) try { const auto * case_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); @@ -1927,7 +1928,7 @@ try } CATCH -TEST_F(DMMinMaxIndexTest, Enum8ValueCompare) +TEST_F(MinMaxIndexTest, Enum8ValueCompare) try { DataTypeEnum8::Values values; @@ -1972,7 +1973,7 @@ try } CATCH -TEST_F(DMMinMaxIndexTest, Enum16ValueCompare) +TEST_F(MinMaxIndexTest, Enum16ValueCompare) try { DataTypeEnum16::Values values; @@ -2020,7 +2021,7 @@ try CATCH // Check it compatible with the minmax index generated by the version before v6.4 -TEST_F(DMMinMaxIndexTest, CompatibleOldMinmaxIndex) +TEST_F(MinMaxIndexTest, CompatibleOldMinmaxIndex) try { RSCheckParam param; @@ -2029,8 +2030,8 @@ try auto data_type = makeNullable(type); // Generate a minmax index with the min value is null as a old version(before v6.4) minmax index. - auto has_null_marks = std::make_shared>(1); - auto has_value_marks = std::make_shared>(1); + PaddedPODArray has_null_marks(1); + PaddedPODArray has_value_marks(1); MutableColumnPtr minmaxes = data_type->createColumn(); auto column = data_type->createColumn(); @@ -2042,7 +2043,8 @@ try minmaxes->insertFrom(*col, 1); // insert min index minmaxes->insertFrom(*col, 0); // insert max index - auto minmax = std::make_shared(has_null_marks, has_value_marks, std::move(minmaxes)); + auto minmax + = std::make_shared(std::move(has_null_marks), std::move(has_value_marks), std::move(minmaxes)); auto index = RSIndex(data_type, minmax); param.indexes.emplace(DEFAULT_COL_ID, index); @@ -2054,7 +2056,7 @@ try } CATCH -TEST_F(DMMinMaxIndexTest, InOrNotInNULL) +TEST_F(MinMaxIndexTest, InOrNotInNULL) try { RSCheckParam param; @@ -2062,8 +2064,8 @@ try auto type = std::make_shared(); auto data_type = makeNullable(type); - auto has_null_marks = std::make_shared>(1); - auto has_value_marks = std::make_shared>(1); + PaddedPODArray has_null_marks(1); + PaddedPODArray has_value_marks(1); MutableColumnPtr minmaxes = data_type->createColumn(); auto column = data_type->createColumn(); @@ -2076,7 +2078,8 @@ try minmaxes->insertFrom(*col, 0); // insert min index minmaxes->insertFrom(*col, 1); // insert max index - auto minmax = std::make_shared(has_null_marks, has_value_marks, std::move(minmaxes)); + auto minmax + = std::make_shared(std::move(has_null_marks), std::move(has_value_marks), std::move(minmaxes)); auto index = RSIndex(data_type, minmax); param.indexes.emplace(DEFAULT_COL_ID, index); @@ -2114,7 +2117,7 @@ try } CATCH -TEST_F(DMMinMaxIndexTest, ParseIn) +TEST_F(MinMaxIndexTest, ParseIn) try { const google::protobuf::RepeatedPtrField pushed_down_filters{}; @@ -2250,17 +2253,53 @@ try }; const auto op = DB::DM::FilterParser::parseDAGQuery(*dag_query, columns_to_read, create_attr_by_column_id, Logger::get()); - ASSERT_EQ( + EXPECT_EQ( op->toDebugString(), - "{\"op\":\"and\",\"children\":[{\"op\":\"in\",\"col\":\"b\",\"value\":\"[\"1\",\"2\"]},{\"op\":\"unsupported\"," - "\"reason\":\"Multiple ColumnRef in expression is not supported\",\"content\":\"tp: ScalarFunc children { tp: " - "ColumnRef val: \"\\200\\000\\000\\000\\000\\000\\000\\001\" field_type { tp: 1 flag: 0 } } children { tp: " - "Int64 val: \"\\200\\000\\000\\000\\000\\000\\000\\001\" } children { tp: ColumnRef val: " - "\"\\200\\000\\000\\000\\000\\000\\000\\002\" field_type { tp: 1 flag: 0 } } sig: " - "InInt\"},{\"op\":\"unsupported\",\"reason\":\"Multiple ColumnRef in expression is not " - "supported\",\"content\":\"tp: ScalarFunc children { tp: ColumnRef val: " - "\"\\200\\000\\000\\000\\000\\000\\000\\001\" field_type { tp: 1 flag: 0 } } children { tp: ColumnRef val: " - "\"\\200\\000\\000\\000\\000\\000\\000\\002\" field_type { tp: 1 flag: 0 } } sig: InInt\"}]}"); + R"raw({"op":"and","children":[{"op":"in","col":"b","value":"["1","2"]},{"op":"unsupported","reason":"Multiple ColumnRef in expression is not supported, sig=InInt"},{"op":"unsupported","reason":"Multiple ColumnRef in expression is not supported, sig=InInt"}]})raw"); +} +CATCH + +TEST_F(MinMaxIndexTest, CheckIsNull) +try +{ + struct IsNullTestCase + { + std::vector> column_data; + std::vector del_mark; + RSResult result; + }; + + std::vector cases = { + {{1, 2, 3, 4, std::nullopt}, {0, 0, 0, 0, 0}, RSResult::Some}, + {{6, 7, 8, 9, 10}, {0, 0, 0, 0, 0}, RSResult::None}, + {{std::nullopt, std::nullopt}, {0, 0}, RSResult::All}, + {{1, 2, 3, 4, std::nullopt}, {0, 0, 0, 0, 1}, RSResult::None}, + {{6, 7, 8, 9, 10}, {0, 0, 0, 1, 0}, RSResult::None}, + {{std::nullopt, std::nullopt}, {1, 0}, RSResult::All}, + {{std::nullopt, std::nullopt}, {1, 1}, RSResult::None}, + {{1, 2, 3, 4}, {1, 1, 1, 1}, RSResult::None}, + }; + + auto col_type = makeNullable(std::make_shared()); + auto minmax_index = std::make_shared(*col_type); + for (const auto & c : cases) + { + ASSERT_EQ(c.column_data.size(), c.del_mark.size()); + auto col_data = createColumn>(c.column_data).column; + auto del_mark_col = createColumn(c.del_mark).column; + minmax_index->addPack(*col_data, static_cast *>(del_mark_col.get())); + } + + auto actual_results = minmax_index->checkIsNull(0, cases.size()); + for (size_t i = 0; i < cases.size(); ++i) + { + const auto & c = cases[i]; + ASSERT_EQ(actual_results[i], c.result) << fmt::format( + "i={} actual={} expected={}", + i, + magic_enum::enum_name(actual_results[i]), + magic_enum::enum_name(c.result)); + } } CATCH diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp index f635e6cc390..bd98c609ef3 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp @@ -16,23 +16,23 @@ #include #include -namespace DB +namespace DB::DM::tests { -namespace DM -{ -namespace tests -{ -TEST(HandleRange_test, Redact) + +TEST(HandleRangeTest, Redact) { HandleRange range(20, 400); - Redact::setRedactLog(false); + Redact::setRedactLog(RedactMode::Disable); EXPECT_EQ(range.toDebugString(), "[20,400)"); - Redact::setRedactLog(true); + Redact::setRedactLog(RedactMode::Enable); EXPECT_EQ(range.toDebugString(), "[?,?)"); - Redact::setRedactLog(false); // restore flags + Redact::setRedactLog(RedactMode::Marker); + EXPECT_EQ(range.toDebugString(), "[‹20›,‹400›)"); + + Redact::setRedactLog(RedactMode::Disable); // restore flags } namespace @@ -53,7 +53,7 @@ std::shared_ptr genTestRegionRangeKeys() } } // namespace -TEST(RowKeyRange_test, Basic) +TEST(HandleRangeTest, Basic) { RowKeyRange all_range = RowKeyRange::newAll(true, 3); EXPECT_TRUE(all_range.isStartInfinite()); @@ -63,20 +63,23 @@ TEST(RowKeyRange_test, Basic) EXPECT_TRUE(none_range.none()); } -TEST(RowKeyRange_test, RedactRangeFromHandle) +TEST(HandleRangeTest, RedactRangeFromHandle) { RowKeyRange range = RowKeyRange::fromHandleRange(HandleRange{20, 400}); - Redact::setRedactLog(false); + Redact::setRedactLog(RedactMode::Disable); EXPECT_EQ(range.toDebugString(), "[20,400)"); - Redact::setRedactLog(true); + Redact::setRedactLog(RedactMode::Enable); EXPECT_EQ(range.toDebugString(), "[?,?)"); - Redact::setRedactLog(false); // restore flags + Redact::setRedactLog(RedactMode::Marker); + EXPECT_EQ(range.toDebugString(), "[‹20›,‹400›)"); + + Redact::setRedactLog(RedactMode::Disable); // restore flags } -TEST(RowKeyRange_test, RedactRangeFromCommonHandle) +TEST(HandleRangeTest, RedactRangeFromCommonHandle) { auto region_range = genTestRegionRangeKeys(); TableID table_id = 49; @@ -85,18 +88,24 @@ TEST(RowKeyRange_test, RedactRangeFromCommonHandle) RowKeyRange none_range = RowKeyRange::newNone(true, 3); // print some values - Redact::setRedactLog(false); - EXPECT_NE(range.toDebugString(), "[?,?)"); - EXPECT_NE(all_range.toDebugString(), "[?,?)"); - EXPECT_NE(none_range.toDebugString(), "[?,?)"); + Redact::setRedactLog(RedactMode::Disable); + EXPECT_EQ(range.toDebugString(), "[02066161610206616263,02066262620206616263)"); + EXPECT_EQ(all_range.toDebugString(), "[01,FA)"); + EXPECT_EQ(none_range.toDebugString(), "[FA,01)"); // print placeholder(?) instead of values - Redact::setRedactLog(true); + Redact::setRedactLog(RedactMode::Enable); EXPECT_EQ(range.toDebugString(), "[?,?)"); EXPECT_EQ(all_range.toDebugString(), "[?,?)"); EXPECT_EQ(none_range.toDebugString(), "[?,?)"); - Redact::setRedactLog(false); // restore flags + // print values with marker + Redact::setRedactLog(RedactMode::Marker); + EXPECT_EQ(range.toDebugString(), "[‹02066161610206616263›,‹02066262620206616263›)"); + EXPECT_EQ(all_range.toDebugString(), "[‹01›,‹FA›)"); + EXPECT_EQ(none_range.toDebugString(), "[‹FA›,‹01›)"); + + Redact::setRedactLog(RedactMode::Disable); // restore flags } TEST(RowKey, ToNextKeyIntHandle) @@ -150,7 +159,7 @@ TEST(RowKey, ToNextKeyCommonHandle) TEST(RowKey, NextIntHandleCompare) { auto int_max = RowKeyValue::INT_HANDLE_MAX_KEY; - auto int_max_i64 = RowKeyValue::fromHandle(Handle(std::numeric_limits::max())); + auto int_max_i64 = RowKeyValue::fromHandle(static_cast(std::numeric_limits::max())); EXPECT_GT(int_max.toRowKeyValueRef(), int_max_i64.toRowKeyValueRef()); @@ -167,9 +176,9 @@ TEST(RowKey, NextIntHandleCompare) TEST(RowKey, NextIntHandleMinMax) { - auto v0 = RowKeyValue::fromHandle(Handle(1178400)); + auto v0 = RowKeyValue::fromHandle(static_cast(1178400)); auto v0_next = v0.toNext(); - auto v1 = RowKeyValue::fromHandle(Handle(1178401)); + auto v1 = RowKeyValue::fromHandle(static_cast(1178401)); EXPECT_EQ(v0, std::min(v0, v1)); EXPECT_EQ(v0, std::min(v0, v0_next)); @@ -178,6 +187,4 @@ TEST(RowKey, NextIntHandleMinMax) EXPECT_EQ(v1, std::max(v0, v0_next)); } -} // namespace tests -} // namespace DM -} // namespace DB +} // namespace DB::DM::tests diff --git a/dbms/src/Storages/KVStore/FFI/JointThreadAllocInfo.cpp b/dbms/src/Storages/KVStore/FFI/JointThreadAllocInfo.cpp index b1fd26eb16b..2bef3a5a173 100644 --- a/dbms/src/Storages/KVStore/FFI/JointThreadAllocInfo.cpp +++ b/dbms/src/Storages/KVStore/FFI/JointThreadAllocInfo.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,7 @@ void JointThreadInfoJeallocMap::recordThreadAllocInfo() { recordThreadAllocInfoForProxy(); recordThreadAllocInfoForStorage(); + recordClassdAlloc(); } JointThreadInfoJeallocMap::~JointThreadInfoJeallocMap() @@ -269,4 +271,13 @@ void JointThreadInfoJeallocMap::accessStorageMap(std::function findSplitKeys(uint64_t splits_count) const override; void seek(BaseBuffView && view) const override; @@ -147,20 +147,27 @@ class MultiSSTReader : public SSTReader } size_t getSplitId() const override { return split_id; } - // Switch to next mono reader if current is drained, + // Switch to next mono reader if current SST is drained, // and we have a next sst file to read. - void maybeNextReader() const + void maybeNextReader() { - if (!mono->remained()) + if (likely(mono->remained())) + return; + + sst_idx++; + if (sst_idx < args.size()) { - current++; - if (current < args.size()) - { - // We don't drop if mono is the last instance for safety, - // and it will be dropped as MultiSSTReader is dropped. - LOG_INFO(log, "Open sst file {}", buffToStrView(args[current].path)); - mono = initer(proxy_helper, args[current], range, split_id); - } + // We don't drop if mono is the last instance for safety, + // and it will be dropped as MultiSSTReader is dropped. + LOG_INFO( + log, + "Open sst file {}, range={} sst_idx={} sst_tot={} split_id={}", + buffToStrView(args[sst_idx].path), + range->toDebugString(), + sst_idx, + args.size(), + split_id); + mono = initer(proxy_helper, args[sst_idx], range, split_id); } } @@ -177,18 +184,19 @@ class MultiSSTReader : public SSTReader , type(type_) , initer(initer_) , args(args_) - , current(0) + , sst_idx(0) , range(range_) , split_id(split_id_) { assert(args.size() > 0); LOG_INFO( log, - "Open sst file first {} range {} split_id={}", - buffToStrView(args[current].path), + "Open sst file first {}, range={} sst_tot={} split_id={}", + buffToStrView(args[sst_idx].path), range->toDebugString(), + args.size(), split_id); - mono = initer(proxy_helper, args[current], range, split_id); + mono = initer(proxy_helper, args[sst_idx], range, split_id); } ~MultiSSTReader() override @@ -202,12 +210,12 @@ class MultiSSTReader : public SSTReader /// The instance is ill-formed if the size of `args` is zero. mutable std::unique_ptr mono; const TiFlashRaftProxyHelper * proxy_helper; - ColumnFamilyType type; + const ColumnFamilyType type; Initer initer; std::vector args; - mutable size_t current; + size_t sst_idx; RegionRangeFilter range; - size_t split_id; + const size_t split_id; }; } // namespace DB diff --git a/dbms/src/Storages/KVStore/MultiRaft/PrehandleSnapshot.cpp b/dbms/src/Storages/KVStore/MultiRaft/PrehandleSnapshot.cpp index ea3aa4fc10d..f832b19866c 100644 --- a/dbms/src/Storages/KVStore/MultiRaft/PrehandleSnapshot.cpp +++ b/dbms/src/Storages/KVStore/MultiRaft/PrehandleSnapshot.cpp @@ -84,7 +84,7 @@ void PreHandlingTrace::waitForSubtaskResources(uint64_t region_id, size_t parall { LOG_DEBUG( log, - "Prehandle resource meet, limit={}, current={}, region_id={}", + "Prehandle resource meet, limit={} current={} region_id={}", parallel_subtask_limit, ongoing_prehandle_subtask_count.load(), region_id); @@ -139,6 +139,7 @@ static inline std::tuple executeTransform trace.releaseSubtaskResources(region_id, split_id); CurrentMetrics::sub(CurrentMetrics::RaftNumPrehandlingSubTasks); }); + Stopwatch sw; LOG_INFO( log, "Add prehandle task split_id={} limit={}", @@ -200,6 +201,10 @@ static inline std::tuple executeTransform stream->cancel(); res = ReadFromStreamResult{.error = abort_reason.value(), .extra_msg = "", .region = new_region}; } + auto keys_per_second = (sst_stream->getProcessKeys().write_cf + sst_stream->getProcessKeys().lock_cf + + sst_stream->getProcessKeys().write_cf) + * 1.0 / sw.elapsedSeconds(); + GET_METRIC(tiflash_raft_command_throughput, type_prehandle_snapshot).Observe(keys_per_second); return std::make_pair( std::move(res), PrehandleResult{ @@ -254,6 +259,7 @@ PrehandleResult KVStore::preHandleSnapshotToFiles( std::optional deadline_index, TMTContext & tmt) { + GET_METRIC(tiflash_raft_raft_events_count, type_prehandle).Increment(); new_region->beforePrehandleSnapshot(new_region->id(), deadline_index); ongoing_prehandle_task_count.fetch_add(1); @@ -392,8 +398,7 @@ static inline std::pair, size_t> getSplitKey( LOG_INFO( log, "getSplitKey result {}, total_concurrency={} ongoing={} total_split_parts={} split_keys={} " - "region_range={} approx_bytes={} " - "region_id={}", + "region_range={} approx_bytes={} region_id={}", fmt_buf.toString(), total_concurrency, ongoing_count, @@ -442,7 +447,7 @@ static void runInParallel( = executeTransform(log, prehandle_ctx, part_new_region, part_sst_stream); LOG_INFO( log, - "Finished extra parallel prehandle task limit {} write_cf={} lock_cf={} default_cf={} dmfiles={} error={}, " + "Finished extra parallel prehandle task limit {} write_cf={} lock_cf={} default_cf={} dmfiles={} error={} " "split_id={} region_id={}", limit_tag, part_prehandle_result.stats.write_cf_keys, @@ -470,8 +475,7 @@ static void runInParallel( LOG_INFO( log, "Parallel prehandling error {}" - " write_cf_off={}" - " split_id={} region_id={}", + " write_cf_off={} split_id={} region_id={}", e.message(), processed_keys.write_cf, extra_id, @@ -503,10 +507,11 @@ void executeParallelTransform( split_key_count); LOG_INFO( log, - "Parallel prehandling for single big region, range={}, split keys={}, region_id={}", + "Parallel prehandling for single big region, range={} split_keys={} region_id={} snaps={}", new_region->getRange()->toDebugString(), split_key_count, - new_region->id()); + new_region->id(), + snaps.len); Stopwatch watch; // Make sure the queue is bigger than `split_key_count`, otherwise `addTask` may fail. auto async_tasks = SingleSnapshotAsyncTasks(split_key_count, split_key_count, split_key_count + 5); @@ -544,9 +549,8 @@ void executeParallelTransform( auto [head_result, head_prehandle_result] = executeTransform(log, prehandle_ctx, new_region, sst_stream); LOG_INFO( log, - "Finished extra parallel prehandle task limit={} write_cf {} lock_cf={} default_cf={} dmfiles={} " - "error={}, split_id={}, " - "region_id={}", + "Finished extra parallel prehandle task, limit={} write_cf={} lock_cf={} default_cf={} dmfiles={} " + "error={} split_id={} region_id={}", sst_stream->getSoftLimit()->toDebugString(), head_prehandle_result.stats.write_cf_keys, head_prehandle_result.stats.lock_cf_keys, @@ -708,9 +712,10 @@ PrehandleResult KVStore::preHandleSSTsToDTFiles( { LOG_INFO( log, - "Single threaded prehandling for single region, range={} region_id={}", + "Single threaded prehandling for single region, range={} region_id={} snaps={}", new_region->getRange()->toDebugString(), - new_region->id()); + new_region->id(), + snaps.len); std::tie(result, prehandle_result) = executeTransform(log, prehandle_ctx, new_region, sst_stream); } else diff --git a/dbms/src/Storages/KVStore/tests/gtest_new_kvstore.cpp b/dbms/src/Storages/KVStore/tests/gtest_new_kvstore.cpp index 2afa8ead3cd..e3ddb37da1b 100644 --- a/dbms/src/Storages/KVStore/tests/gtest_new_kvstore.cpp +++ b/dbms/src/Storages/KVStore/tests/gtest_new_kvstore.cpp @@ -20,6 +20,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include #include diff --git a/dbms/src/Storages/KVStore/tests/gtest_tikv_keyvalue.cpp b/dbms/src/Storages/KVStore/tests/gtest_tikv_keyvalue.cpp index dd9ca6f69aa..5f051fa533c 100644 --- a/dbms/src/Storages/KVStore/tests/gtest_tikv_keyvalue.cpp +++ b/dbms/src/Storages/KVStore/tests/gtest_tikv_keyvalue.cpp @@ -15,10 +15,9 @@ #include #include #include +#include #include -#include "region_helper.h" - namespace DB { TiKVValue encode_lock_cf_value( @@ -467,19 +466,29 @@ try auto raw_pk1 = RecordKVFormat::getRawTiDBPK(*raw_keys.first); auto raw_pk2 = RecordKVFormat::getRawTiDBPK(*raw_keys.second); - Redact::setRedactLog(false); + Redact::setRedactLog(RedactMode::Disable); // These will print the value - EXPECT_NE(raw_pk1.toDebugString(), "?"); - EXPECT_NE(raw_pk2.toDebugString(), "?"); - EXPECT_NE(RecordKVFormat::DecodedTiKVKeyRangeToDebugString(raw_keys), "[?, ?)"); + EXPECT_EQ(raw_pk1.toDebugString(), "02066161610206616263"); + EXPECT_EQ(raw_pk2.toDebugString(), "02066262620206616263"); + EXPECT_EQ( + RecordKVFormat::DecodedTiKVKeyRangeToDebugString(raw_keys), + "[02066161610206616263, 02066262620206616263)"); - Redact::setRedactLog(true); + Redact::setRedactLog(RedactMode::Enable); // These will print '?' instead of value EXPECT_EQ(raw_pk1.toDebugString(), "?"); EXPECT_EQ(raw_pk2.toDebugString(), "?"); EXPECT_EQ(RecordKVFormat::DecodedTiKVKeyRangeToDebugString(raw_keys), "[?, ?)"); - Redact::setRedactLog(false); // restore flags + // print values with marker + Redact::setRedactLog(RedactMode::Marker); + EXPECT_EQ(raw_pk1.toDebugString(), "‹02066161610206616263›"); + EXPECT_EQ(raw_pk2.toDebugString(), "‹02066262620206616263›"); + EXPECT_EQ( + RecordKVFormat::DecodedTiKVKeyRangeToDebugString(raw_keys), + "[‹02066161610206616263›, ‹02066262620206616263›)"); + + Redact::setRedactLog(RedactMode::Disable); // restore flags } CATCH diff --git a/dbms/src/Storages/KVStore/tests/region_kvstore_test.h b/dbms/src/Storages/KVStore/tests/region_kvstore_test.h index 9f49ef724e9..67342b033a3 100644 --- a/dbms/src/Storages/KVStore/tests/region_kvstore_test.h +++ b/dbms/src/Storages/KVStore/tests/region_kvstore_test.h @@ -62,7 +62,7 @@ inline void validateSSTGeneration( size_t split_id) -> std::unique_ptr { auto parsed_kind = MockSSTGenerator::parseSSTViewKind(buffToStrView(snap.path)); auto reader = std::make_unique(proxy_helper, snap, range, split_id); - assert(reader->sst_format_kind() == parsed_kind); + assert(reader->sstFormatKind() == parsed_kind); return reader; }; MultiSSTReader reader{ diff --git a/dbms/src/Parsers/tests/create_parser.cpp b/dbms/src/Storages/Page/PageStorageMemorySummary.h similarity index 52% rename from dbms/src/Parsers/tests/create_parser.cpp rename to dbms/src/Storages/Page/PageStorageMemorySummary.h index 29a7561dd58..9c1717fed48 100644 --- a/dbms/src/Parsers/tests/create_parser.cpp +++ b/dbms/src/Storages/Page/PageStorageMemorySummary.h @@ -12,25 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include +#pragma once -#include +#include - -int main(int, char **) +namespace DB::PS { - using namespace DB; - - std::string input - = "CREATE TABLE hits (URL String, UserAgentMinor2 FixedString(2), EventTime DateTime) ENGINE = Log"; - ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0); - - formatAST(*ast, std::cerr); - std::cerr << std::endl; +struct PageStorageMemorySummary +{ + static inline std::atomic_int64_t uni_page_id_bytes{0}; + static inline std::atomic_int64_t versioned_entry_or_delete_bytes{0}; + static inline std::atomic_int64_t versioned_entry_or_delete_count{0}; +}; - return 0; -} +} // namespace DB::PS \ No newline at end of file diff --git a/dbms/src/Storages/Page/V2/tests/gtest_page_map_version_set.cpp b/dbms/src/Storages/Page/V2/tests/gtest_page_map_version_set.cpp index e9b7a139014..e2d0d9672f7 100644 --- a/dbms/src/Storages/Page/V2/tests/gtest_page_map_version_set.cpp +++ b/dbms/src/Storages/Page/V2/tests/gtest_page_map_version_set.cpp @@ -786,7 +786,6 @@ TYPED_TEST_P(PageMapVersionSetTest, LiveFiles) } auto s3 = versions.getSnapshot("", nullptr); s3.reset(); // do compact on version-list, and - //std::cerr << "s3 reseted." << std::endl; auto [livefiles, live_normal_pages] = versions.listAllLiveFiles(versions.acquireForLock()); ASSERT_EQ(livefiles.size(), 4UL) << liveFilesToString(livefiles); ASSERT_EQ(livefiles.count(std::make_pair(1, 0)), 1UL); // hold by s1 @@ -800,7 +799,6 @@ TYPED_TEST_P(PageMapVersionSetTest, LiveFiles) EXPECT_GT(live_normal_pages.count(3), 0UL); s2.reset(); - //std::cerr << "s2 reseted." << std::endl; std::tie(livefiles, live_normal_pages) = versions.listAllLiveFiles(versions.acquireForLock()); ASSERT_EQ(livefiles.size(), 3UL) << liveFilesToString(livefiles); ASSERT_EQ(livefiles.count(std::make_pair(1, 0)), 1UL); // hold by s1 @@ -812,7 +810,6 @@ TYPED_TEST_P(PageMapVersionSetTest, LiveFiles) EXPECT_GT(live_normal_pages.count(2), 0UL); s1.reset(); - //std::cerr << "s1 reseted." << std::endl; std::tie(livefiles, live_normal_pages) = versions.listAllLiveFiles(versions.acquireForLock()); ASSERT_EQ(livefiles.size(), 2UL) << liveFilesToString(livefiles); ASSERT_EQ(livefiles.count(std::make_pair(2, 0)), 1UL); // hold by current diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index f6fb9fb8907..5103e327ddc 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -222,31 +222,47 @@ struct EntryOrDelete MultiVersionRefCount being_ref_count; std::optional entry; - static EntryOrDelete newDelete() + EntryOrDelete(const EntryOrDelete & other) + : being_ref_count(other.being_ref_count) + , entry(other.entry) { - return EntryOrDelete{ - .entry = std::nullopt, - }; - }; - static EntryOrDelete newNormalEntry(const PageEntryV3 & entry) + PageStorageMemorySummary::versioned_entry_or_delete_count.fetch_add(1); + if (entry) + PageStorageMemorySummary::versioned_entry_or_delete_bytes.fetch_add(sizeof(PageEntryV3)); + } + EntryOrDelete() { PageStorageMemorySummary::versioned_entry_or_delete_count.fetch_add(1); } + EntryOrDelete(std::optional entry_) + : entry(std::move(entry_)) + { + PageStorageMemorySummary::versioned_entry_or_delete_count.fetch_add(1); + if (entry) + PageStorageMemorySummary::versioned_entry_or_delete_bytes.fetch_add(sizeof(PageEntryV3)); + } + EntryOrDelete(MultiVersionRefCount being_ref_count_, std::optional entry_) + : being_ref_count(being_ref_count_) + , entry(std::move(entry_)) { - return EntryOrDelete{ - .entry = entry, - }; + PageStorageMemorySummary::versioned_entry_or_delete_count.fetch_add(1); + if (entry) + PageStorageMemorySummary::versioned_entry_or_delete_bytes.fetch_add(sizeof(PageEntryV3)); } + ~EntryOrDelete() + { + PageStorageMemorySummary::versioned_entry_or_delete_count.fetch_sub(1); + if (entry) + PageStorageMemorySummary::versioned_entry_or_delete_bytes.fetch_sub(sizeof(PageEntryV3)); + } + + static EntryOrDelete newDelete() { return EntryOrDelete(std::nullopt); }; + static EntryOrDelete newNormalEntry(const PageEntryV3 & entry) { return EntryOrDelete(entry); } static EntryOrDelete newReplacingEntry(const EntryOrDelete & ori_entry, const PageEntryV3 & entry) { - return EntryOrDelete{ - .being_ref_count = ori_entry.being_ref_count, - .entry = entry, - }; + return EntryOrDelete(ori_entry.being_ref_count, entry); } static EntryOrDelete newFromRestored(PageEntryV3 entry, const PageVersion & ver, Int64 being_ref_count) { - auto result = EntryOrDelete{ - .entry = entry, - }; + auto result = EntryOrDelete(std::move(entry)); result.being_ref_count.restoreFrom(ver, being_ref_count); return result; } diff --git a/dbms/src/Storages/Page/V3/PageDirectory/PageIdTrait.h b/dbms/src/Storages/Page/V3/PageDirectory/PageIdTrait.h index aec9717a15f..54251f0a795 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory/PageIdTrait.h +++ b/dbms/src/Storages/Page/V3/PageDirectory/PageIdTrait.h @@ -29,6 +29,7 @@ struct PageIdTrait static inline PageIdU64 getU64ID(const PageId & page_id) { return page_id.low; } static inline Prefix getPrefix(const PageId & page_id) { return page_id.high; } static inline PageIdU64 getPageMapKey(const PageId & page_id) { return page_id.low; } + static inline size_t getPageIDSize(const PageId & page_id) { return sizeof(page_id); } }; } // namespace u128 namespace universal @@ -45,6 +46,8 @@ struct PageIdTrait static Prefix getPrefix(const PageId & page_id); static inline PageId getPageMapKey(const PageId & page_id) { return page_id; } + + static inline size_t getPageIDSize(const PageId & page_id) { return page_id.size(); } }; } // namespace universal } // namespace DB::PS::V3 diff --git a/dbms/src/Storages/Page/V3/PageEntry.h b/dbms/src/Storages/Page/V3/PageEntry.h index c3fad616f70..c2c530f5203 100644 --- a/dbms/src/Storages/Page/V3/PageEntry.h +++ b/dbms/src/Storages/Page/V3/PageEntry.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include #include diff --git a/dbms/src/Storages/Page/V3/Universal/UniversalPageId.h b/dbms/src/Storages/Page/V3/Universal/UniversalPageId.h index 9e757732805..0d995dd9d01 100644 --- a/dbms/src/Storages/Page/V3/Universal/UniversalPageId.h +++ b/dbms/src/Storages/Page/V3/Universal/UniversalPageId.h @@ -18,26 +18,49 @@ #include #include #include +#include namespace DB { class UniversalPageId final { public: - UniversalPageId() = default; + UniversalPageId() { PS::PageStorageMemorySummary::uni_page_id_bytes.fetch_add(id.size()); } + UniversalPageId(const UniversalPageId & other) + : id(other.id) + { + PS::PageStorageMemorySummary::uni_page_id_bytes.fetch_add(id.size()); + } UniversalPageId(String id_) // NOLINT(google-explicit-constructor) : id(std::move(id_)) - {} + { + PS::PageStorageMemorySummary::uni_page_id_bytes.fetch_add(id.size()); + } UniversalPageId(const char * id_) // NOLINT(google-explicit-constructor) : id(id_) - {} + { + PS::PageStorageMemorySummary::uni_page_id_bytes.fetch_add(id.size()); + } UniversalPageId(const char * id_, size_t sz_) : id(id_, sz_) - {} + { + PS::PageStorageMemorySummary::uni_page_id_bytes.fetch_add(id.size()); + } + + ~UniversalPageId() { PS::PageStorageMemorySummary::uni_page_id_bytes.fetch_sub(id.size()); } UniversalPageId & operator=(String && id_) noexcept { + if (id.size() == id_.size()) {} + else if (id.size() > id_.size()) + { + PS::PageStorageMemorySummary::uni_page_id_bytes.fetch_sub(id.size() - id_.size()); + } + else + { + PS::PageStorageMemorySummary::uni_page_id_bytes.fetch_add(id_.size() - id.size()); + } id.swap(id_); return *this; } diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 5e017055270..387e1072e35 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -39,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -57,7 +55,6 @@ #include #include #include -#include #include @@ -555,6 +552,8 @@ WriteResult StorageDeltaMerge::write( return store->write(global_context, settings, block, applied_status); } +namespace +{ std::unordered_set parseSegmentSet(const ASTPtr & ast) { if (!ast) @@ -669,14 +668,17 @@ void checkStartTs(UInt64 start_ts, const Context & context, const String & req_i } } -DM::RowKeyRanges StorageDeltaMerge::parseMvccQueryInfo( +DM::RowKeyRanges parseMvccQueryInfo( const DB::MvccQueryInfo & mvcc_query_info, + KeyspaceID keyspace_id, + TableID table_id, + bool is_common_handle, + size_t rowkey_column_size, unsigned num_streams, const Context & context, const String & req_id, const LoggerPtr & tracing_logger) { - auto keyspace_id = getTableInfo().getKeyspaceID(); checkStartTs(mvcc_query_info.start_ts, context, req_id, keyspace_id); FmtBuffer fmt_buf; @@ -709,7 +711,7 @@ DM::RowKeyRanges StorageDeltaMerge::parseMvccQueryInfo( auto ranges = getQueryRanges( mvcc_query_info.regions_query_info, - tidb_table_info.id, + table_id, is_common_handle, rowkey_column_size, num_streams, @@ -729,206 +731,22 @@ DM::RowKeyRanges StorageDeltaMerge::parseMvccQueryInfo( return ranges; } -DM::RSOperatorPtr StorageDeltaMerge::buildRSOperator( - const std::unique_ptr & dag_query, - const ColumnDefines & columns_to_read, - const Context & context, - const LoggerPtr & tracing_logger) -{ - RUNTIME_CHECK(dag_query != nullptr); - // build rough set operator - DM::RSOperatorPtr rs_operator = DM::EMPTY_RS_OPERATOR; - const bool enable_rs_filter = context.getSettingsRef().dt_enable_rough_set_filter; - if (likely(enable_rs_filter)) - { - /// Query from TiDB / TiSpark - auto create_attr_by_column_id = [this](ColumnID column_id) -> Attr { - const ColumnDefines & defines = this->getAndMaybeInitStore()->getTableColumns(); - auto iter = std::find_if(defines.begin(), defines.end(), [column_id](const ColumnDefine & d) -> bool { - return d.id == column_id; - }); - if (iter != defines.end()) - return Attr{.col_name = iter->name, .col_id = iter->id, .type = iter->type}; - // Maybe throw an exception? Or check if `type` is nullptr before creating filter? - return Attr{.col_name = "", .col_id = column_id, .type = DataTypePtr{}}; - }; - rs_operator - = FilterParser::parseDAGQuery(*dag_query, columns_to_read, std::move(create_attr_by_column_id), log); - if (likely(rs_operator != DM::EMPTY_RS_OPERATOR)) - LOG_DEBUG(tracing_logger, "Rough set filter: {}", rs_operator->toDebugString()); - } - else - LOG_DEBUG(tracing_logger, "Rough set filter is disabled."); - - return rs_operator; -} - -DM::PushDownFilterPtr StorageDeltaMerge::buildPushDownFilter( - const RSOperatorPtr & rs_operator, - const ColumnInfos & table_scan_column_info, - const google::protobuf::RepeatedPtrField & pushed_down_filters, - const ColumnDefines & columns_to_read, - const Context & context, - const LoggerPtr & tracing_logger) -{ - if (pushed_down_filters.empty()) - { - LOG_DEBUG(tracing_logger, "Push down filter is empty"); - return std::make_shared(rs_operator); - } - std::unordered_map columns_to_read_map; - for (const auto & column : columns_to_read) - columns_to_read_map.emplace(column.id, column); - - // Get the columns of the filter, is a subset of columns_to_read - std::unordered_set filter_col_id_set; - for (const auto & expr : pushed_down_filters) - { - getColumnIDsFromExpr(expr, table_scan_column_info, filter_col_id_set); - } - auto filter_columns = std::make_shared(); - filter_columns->reserve(filter_col_id_set.size()); - for (const auto & cid : filter_col_id_set) - { - RUNTIME_CHECK_MSG( - columns_to_read_map.contains(cid), - "Filter ColumnID({}) not found in columns_to_read_map", - cid); - filter_columns->emplace_back(columns_to_read_map.at(cid)); - } - - // The source_columns_of_analyzer should be the same as the size of table_scan_column_info - // The columns_to_read is a subset of table_scan_column_info, when there are generated columns and extra table id column. - NamesAndTypes source_columns_of_analyzer; - source_columns_of_analyzer.reserve(table_scan_column_info.size()); - for (size_t i = 0; i < table_scan_column_info.size(); ++i) - { - auto const & ci = table_scan_column_info[i]; - const auto cid = ci.id; - if (ci.hasGeneratedColumnFlag()) - { - const auto & col_name = GeneratedColumnPlaceholderBlockInputStream::getColumnName(i); - const auto & data_type = getDataTypeByColumnInfoForComputingLayer(ci); - source_columns_of_analyzer.emplace_back(col_name, data_type); - continue; - } - if (cid == EXTRA_TABLE_ID_COLUMN_ID) - { - source_columns_of_analyzer.emplace_back(EXTRA_TABLE_ID_COLUMN_NAME, EXTRA_TABLE_ID_COLUMN_TYPE); - continue; - } - RUNTIME_CHECK_MSG(columns_to_read_map.contains(cid), "ColumnID({}) not found in columns_to_read_map", cid); - source_columns_of_analyzer.emplace_back(columns_to_read_map.at(cid).name, columns_to_read_map.at(cid).type); - } - std::unique_ptr analyzer - = std::make_unique(source_columns_of_analyzer, context); - - // Build the extra cast - ExpressionActionsPtr extra_cast = nullptr; - // need_cast_column should be the same size as table_scan_column_info and source_columns_of_analyzer - std::vector may_need_add_cast_column; - may_need_add_cast_column.reserve(table_scan_column_info.size()); - for (const auto & col : table_scan_column_info) - may_need_add_cast_column.push_back( - !col.hasGeneratedColumnFlag() && filter_col_id_set.contains(col.id) && col.id != -1); - ExpressionActionsChain chain; - auto & step = analyzer->initAndGetLastStep(chain); - auto & actions = step.actions; - if (auto [has_cast, casted_columns] - = analyzer->buildExtraCastsAfterTS(actions, may_need_add_cast_column, table_scan_column_info); - has_cast) - { - NamesWithAliases project_cols; - for (size_t i = 0; i < columns_to_read.size(); ++i) - { - if (filter_col_id_set.contains(columns_to_read[i].id)) - project_cols.emplace_back(casted_columns[i], columns_to_read[i].name); - } - actions->add(ExpressionAction::project(project_cols)); - - for (const auto & col : *filter_columns) - step.required_output.push_back(col.name); - - extra_cast = chain.getLastActions(); - chain.finalize(); - chain.clear(); - LOG_DEBUG(tracing_logger, "Extra cast for filter columns: {}", extra_cast->dumpActions()); - } - - // build filter expression actions - auto [before_where, filter_column_name, project_after_where] - = ::DB::buildPushDownFilter(pushed_down_filters, *analyzer); - LOG_DEBUG(tracing_logger, "Push down filter: {}", before_where->dumpActions()); - - // record current column defines - auto columns_after_cast = std::make_shared(); - if (extra_cast != nullptr) - { - columns_after_cast->reserve(columns_to_read.size()); - const auto & current_names_and_types = analyzer->getCurrentInputColumns(); - for (size_t i = 0; i < table_scan_column_info.size(); ++i) - { - if (table_scan_column_info[i].hasGeneratedColumnFlag() - || table_scan_column_info[i].id == EXTRA_TABLE_ID_COLUMN_ID) - continue; - auto col = columns_to_read_map.at(table_scan_column_info[i].id); - RUNTIME_CHECK_MSG( - col.name == current_names_and_types[i].name, - "Column name mismatch, expect: {}, actual: {}", - col.name, - current_names_and_types[i].name); - columns_after_cast->push_back(col); - columns_after_cast->back().type = current_names_and_types[i].type; - } - } - - return std::make_shared( - rs_operator, - before_where, - project_after_where, - filter_columns, - filter_column_name, - extra_cast, - columns_after_cast); -} - -DM::PushDownFilterPtr StorageDeltaMerge::parsePushDownFilter( +RuntimeFilteList parseRuntimeFilterList( const SelectQueryInfo & query_info, - const ColumnDefines & columns_to_read, - const Context & context, - const LoggerPtr & tracing_logger) + const Context & db_context, + const LoggerPtr & log) { - const auto & dag_query = query_info.dag_query; - if (unlikely(dag_query == nullptr)) - return EMPTY_FILTER; - - // build rough set operator - const DM::RSOperatorPtr rs_operator = buildRSOperator(dag_query, columns_to_read, context, tracing_logger); - // build push down filter - const auto & columns_to_read_info = dag_query->source_columns; - const auto & pushed_down_filters = dag_query->pushed_down_filters; - if (unlikely(context.getSettingsRef().force_push_down_all_filters_to_scan) && !dag_query->filters.empty()) + if (db_context.getDAGContext() == nullptr || query_info.dag_query == nullptr) { - google::protobuf::RepeatedPtrField merged_filters{ - pushed_down_filters.begin(), - pushed_down_filters.end()}; - merged_filters.MergeFrom(dag_query->filters); - return buildPushDownFilter( - rs_operator, - columns_to_read_info, - merged_filters, - columns_to_read, - context, - tracing_logger); + return std::vector{}; } - return buildPushDownFilter( - rs_operator, - columns_to_read_info, - pushed_down_filters, - columns_to_read, - context, - tracing_logger); + auto runtime_filter_list = db_context.getDAGContext()->runtime_filter_mgr.getLocalRuntimeFilterByIds( + query_info.dag_query->runtime_filter_ids); + LOG_DEBUG(log, "build runtime filter in local stream, list size:{}", runtime_filter_list.size()); + return runtime_filter_list; } +} // namespace + BlockInputStreams StorageDeltaMerge::read( const Names & column_names, @@ -965,12 +783,21 @@ BlockInputStreams StorageDeltaMerge::read( // Read with MVCC filtering RUNTIME_CHECK(query_info.mvcc_query_info != nullptr); const auto & mvcc_query_info = *query_info.mvcc_query_info; + const auto keyspace_id = getTableInfo().getKeyspaceID(); + auto ranges = parseMvccQueryInfo( + mvcc_query_info, + keyspace_id, + tidb_table_info.id, + is_common_handle, + rowkey_column_size, + num_streams, + context, + query_info.req_id, + tracing_logger); - auto ranges = parseMvccQueryInfo(mvcc_query_info, num_streams, context, query_info.req_id, tracing_logger); - - auto filter = parsePushDownFilter(query_info, columns_to_read, context, tracing_logger); + auto filter = PushDownFilter::build(query_info, columns_to_read, store->getTableColumns(), context, tracing_logger); - auto runtime_filter_list = parseRuntimeFilterList(query_info, context); + auto runtime_filter_list = parseRuntimeFilterList(query_info, context, log); const auto & scan_context = mvcc_query_info.scan_context; @@ -992,7 +819,6 @@ BlockInputStreams StorageDeltaMerge::read( extra_table_id_index, scan_context); - auto keyspace_id = getTableInfo().getKeyspaceID(); /// Ensure start_ts info after read. checkStartTs(mvcc_query_info.start_ts, context, query_info.req_id, keyspace_id); @@ -1001,20 +827,6 @@ BlockInputStreams StorageDeltaMerge::read( return streams; } -RuntimeFilteList StorageDeltaMerge::parseRuntimeFilterList( - const SelectQueryInfo & query_info, - const Context & db_context) const -{ - if (db_context.getDAGContext() == nullptr || query_info.dag_query == nullptr) - { - return std::vector{}; - } - auto runtime_filter_list = db_context.getDAGContext()->runtime_filter_mgr.getLocalRuntimeFilterByIds( - query_info.dag_query->runtime_filter_ids); - LOG_DEBUG(log, "build runtime filter in local stream, list size:{}", runtime_filter_list.size()); - return runtime_filter_list; -} - void StorageDeltaMerge::read( PipelineExecutorContext & exec_context_, PipelineExecGroupBuilder & group_builder, @@ -1054,12 +866,21 @@ void StorageDeltaMerge::read( // Read with MVCC filtering RUNTIME_CHECK(query_info.mvcc_query_info != nullptr); const auto & mvcc_query_info = *query_info.mvcc_query_info; + const auto keyspace_id = getTableInfo().getKeyspaceID(); + auto ranges = parseMvccQueryInfo( + mvcc_query_info, + keyspace_id, + tidb_table_info.id, + is_common_handle, + rowkey_column_size, + num_streams, + context, + query_info.req_id, + tracing_logger); - auto ranges = parseMvccQueryInfo(mvcc_query_info, num_streams, context, query_info.req_id, tracing_logger); - - auto filter = parsePushDownFilter(query_info, columns_to_read, context, tracing_logger); + auto filter = PushDownFilter::build(query_info, columns_to_read, store->getTableColumns(), context, tracing_logger); - auto runtime_filter_list = parseRuntimeFilterList(query_info, context); + auto runtime_filter_list = parseRuntimeFilterList(query_info, context, log); const auto & scan_context = mvcc_query_info.scan_context; @@ -1083,7 +904,6 @@ void StorageDeltaMerge::read( extra_table_id_index, scan_context); - auto keyspace_id = getTableInfo().getKeyspaceID(); /// Ensure start_ts info after read. checkStartTs(mvcc_query_info.start_ts, context, query_info.req_id, keyspace_id); @@ -1105,8 +925,18 @@ DM::Remote::DisaggPhysicalTableReadSnapshotPtr StorageDeltaMerge::writeNodeBuild const ASTSelectQuery & select_query = typeid_cast(*query_info.query); RUNTIME_CHECK(query_info.mvcc_query_info != nullptr); + const auto keyspace_id = getTableInfo().getKeyspaceID(); const auto & mvcc_query_info = *query_info.mvcc_query_info; - auto ranges = parseMvccQueryInfo(mvcc_query_info, num_streams, context, query_info.req_id, tracing_logger); + auto ranges = parseMvccQueryInfo( + mvcc_query_info, + keyspace_id, + tidb_table_info.id, + is_common_handle, + rowkey_column_size, + num_streams, + context, + query_info.req_id, + tracing_logger); auto read_segments = parseSegmentSet(select_query.segment_expression_list); auto snap = store->writeNodeBuildRemoteReadSnapshot( @@ -1120,7 +950,6 @@ DM::Remote::DisaggPhysicalTableReadSnapshotPtr StorageDeltaMerge::writeNodeBuild snap->column_defines = std::make_shared(columns_to_read); - auto keyspace_id = getTableInfo().getKeyspaceID(); // Ensure start_ts is valid after snapshot is built checkStartTs(mvcc_query_info.start_ts, context, query_info.req_id, keyspace_id); return snap; diff --git a/dbms/src/Storages/StorageDeltaMerge.h b/dbms/src/Storages/StorageDeltaMerge.h index 4abcfe4d34d..4f3a3651117 100644 --- a/dbms/src/Storages/StorageDeltaMerge.h +++ b/dbms/src/Storages/StorageDeltaMerge.h @@ -208,14 +208,6 @@ class StorageDeltaMerge DM::DMConfigurationOpt createChecksumConfig() const { return DM::DMChecksumConfig::fromDBContext(global_context); } - static DM::PushDownFilterPtr buildPushDownFilter( - const DM::RSOperatorPtr & rs_operator, - const ColumnInfos & table_scan_column_info, - const google::protobuf::RepeatedPtrField & pushed_down_filters, - const DM::ColumnDefines & columns_to_read, - const Context & context, - const LoggerPtr & tracing_logger); - #ifndef DBMS_PUBLIC_GTEST protected: #endif @@ -252,27 +244,6 @@ class StorageDeltaMerge bool dataDirExist(); void shutdownImpl(); - DM::RSOperatorPtr buildRSOperator( - const std::unique_ptr & dag_query, - const DM::ColumnDefines & columns_to_read, - const Context & context, - const LoggerPtr & tracing_logger); - /// Get filters from query to construct rough set operation and push down filters. - DM::PushDownFilterPtr parsePushDownFilter( - const SelectQueryInfo & query_info, - const DM::ColumnDefines & columns_to_read, - const Context & context, - const LoggerPtr & tracing_logger); - - DM::RowKeyRanges parseMvccQueryInfo( - const DB::MvccQueryInfo & mvcc_query_info, - unsigned num_streams, - const Context & context, - const String & req_id, - const LoggerPtr & tracing_logger); - - RuntimeFilteList parseRuntimeFilterList(const SelectQueryInfo & query_info, const Context & db_context) const; - #ifndef DBMS_PUBLIC_GTEST private: #endif diff --git a/dbms/src/Storages/StorageDisaggregatedRemote.cpp b/dbms/src/Storages/StorageDisaggregatedRemote.cpp index 8346e4a9a1e..1ae0bc889e2 100644 --- a/dbms/src/Storages/StorageDisaggregatedRemote.cpp +++ b/dbms/src/Storages/StorageDisaggregatedRemote.cpp @@ -501,19 +501,8 @@ DM::RSOperatorPtr StorageDisaggregated::buildRSOperator( std::vector{}, 0, db_context.getTimezoneInfo()); - auto create_attr_by_column_id = [defines = columns_to_read](ColumnID column_id) -> DM::Attr { - auto iter = std::find_if(defines->begin(), defines->end(), [column_id](const DM::ColumnDefine & d) -> bool { - return d.id == column_id; - }); - if (iter != defines->end()) - return DM::Attr{.col_name = iter->name, .col_id = iter->id, .type = iter->type}; - return DM::Attr{.col_name = "", .col_id = column_id, .type = DataTypePtr{}}; - }; - auto rs_operator - = DM::FilterParser::parseDAGQuery(*dag_query, *columns_to_read, std::move(create_attr_by_column_id), log); - if (likely(rs_operator != DM::EMPTY_RS_OPERATOR)) - LOG_DEBUG(log, "Rough set filter: {}", rs_operator->toDebugString()); - return rs_operator; + + return DM::RSOperator::build(dag_query, *columns_to_read, *columns_to_read, enable_rs_filter, log); } std::variant StorageDisaggregated::packSegmentReadTasks( @@ -526,7 +515,7 @@ std::variant StorageDisagg const auto & executor_id = table_scan.getTableScanExecutorID(); auto rs_operator = buildRSOperator(db_context, column_defines); - auto push_down_filter = StorageDeltaMerge::buildPushDownFilter( + auto push_down_filter = DM::PushDownFilter::build( rs_operator, table_scan.getColumns(), table_scan.getPushedDownFilters(), diff --git a/dbms/src/Storages/tests/gtest_filter_parser.cpp b/dbms/src/Storages/tests/gtest_filter_parser.cpp index aa891dead50..c6de544299b 100644 --- a/dbms/src/Storages/tests/gtest_filter_parser.cpp +++ b/dbms/src/Storages/tests/gtest_filter_parser.cpp @@ -395,27 +395,16 @@ try EXPECT_EQ(rs_operator->name(), "and"); EXPECT_EQ( rs_operator->toDebugString(), - "{\"op\":\"and\",\"children\":[{\"op\":\"unsupported\",\"reason\":\"child of logical and is not " - "function\",\"content\":\"tp: ColumnRef val: \"\\200\\000\\000\\000\\000\\000\\000\\001\" field_type { tp: " - "8 flag: 4097 flen: 0 decimal: 0 collate: 0 " - "}\"},{\"op\":\"unsupported\",\"reason\":\"child of logical and is not " - "function\",\"content\":\"tp: Uint64 val: \"\\000\\000\\000\\000\\000\\000\\000\\001\" field_type { tp: 1 " - "flag: 4129 flen: 0 decimal: 0 collate: 0 }\"}]}"); + R"raw({"op":"and","children":[{"op":"unsupported","reason":"child of logical and is not function, expr.tp=ColumnRef"},{"op":"unsupported","reason":"child of logical and is not function, expr.tp=Uint64"}]})raw"); } - std::cout << " do query select * from default.t_111 where col_6 or 1 " << std::endl; { // Or between col and literal (not supported since Or only support when child is ColumnExpr) auto rs_operator = generateRsOperator(table_info_json, "select * from default.t_111 where col_2 or 1"); EXPECT_EQ(rs_operator->name(), "or"); EXPECT_EQ( rs_operator->toDebugString(), - "{\"op\":\"or\",\"children\":[{\"op\":\"unsupported\",\"reason\":\"child of logical operator is not " - "function\",\"content\":\"tp: ColumnRef val: \"\\200\\000\\000\\000\\000\\000\\000\\001\" field_type { tp: " - "8 flag: 4097 flen: 0 decimal: 0 collate: 0 " - "}\"},{\"op\":\"unsupported\",\"reason\":\"child of logical operator is not " - "function\",\"content\":\"tp: Uint64 val: \"\\000\\000\\000\\000\\000\\000\\000\\001\" field_type { tp: 1 " - "flag: 4129 flen: 0 decimal: 0 collate: 0 }\"}]}"); + R"raw({"op":"or","children":[{"op":"unsupported","reason":"child of logical operator is not function, child_type=ColumnRef"},{"op":"unsupported","reason":"child of logical operator is not function, child_type=Uint64"}]})raw"); } { diff --git a/dbms/src/Storages/tests/gtests_parse_push_down_filter.cpp b/dbms/src/Storages/tests/gtests_parse_push_down_filter.cpp index b26d4a1e080..cfab21463f1 100644 --- a/dbms/src/Storages/tests/gtests_parse_push_down_filter.cpp +++ b/dbms/src/Storages/tests/gtests_parse_push_down_filter.cpp @@ -118,13 +118,8 @@ DM::PushDownFilterPtr ParsePushDownFilterTest::generatePushDownFilter( auto rs_operator = DM::FilterParser::parseDAGQuery(*dag_query, columns_to_read, std::move(create_attr_by_column_id), log); - auto push_down_filter = StorageDeltaMerge::buildPushDownFilter( - rs_operator, - table_info.columns, - pushed_down_filters, - columns_to_read, - *ctx, - log); + auto push_down_filter + = DM::PushDownFilter::build(rs_operator, table_info.columns, pushed_down_filters, columns_to_read, *ctx, log); return push_down_filter; } @@ -648,12 +643,7 @@ try EXPECT_EQ(rs_operator->name(), "and"); EXPECT_EQ( rs_operator->toDebugString(), - "{\"op\":\"and\",\"children\":[{\"op\":\"unsupported\",\"reason\":\"child of logical and is not " - "function\",\"content\":\"tp: ColumnRef val: \"\\200\\000\\000\\000\\000\\000\\000\\001\" field_type { tp: " - "8 flag: 4097 flen: 0 decimal: 0 collate: 0 " - "}\"},{\"op\":\"unsupported\",\"reason\":\"child of logical and is not " - "function\",\"content\":\"tp: Uint64 val: \"\\000\\000\\000\\000\\000\\000\\000\\001\" field_type { tp: 1 " - "flag: 4129 flen: 0 decimal: 0 collate: 0 }\"}]}"); + R"raw({"op":"and","children":[{"op":"unsupported","reason":"child of logical and is not function, expr.tp=ColumnRef"},{"op":"unsupported","reason":"child of logical and is not function, expr.tp=Uint64"}]})raw"); Block before_where_block = Block{ {toVec("col_1", {"a", "b", "c", "test1", "d", "test1", "pingcap", "tiflash"}), @@ -668,7 +658,6 @@ try EXPECT_EQ(filter->filter_columns->size(), 1); } - std::cout << " do query select * from default.t_111 where col_2 or 1 " << std::endl; { // Or between col and literal (not supported since Or only support when child is ColumnExpr) auto filter = generatePushDownFilter( @@ -679,12 +668,7 @@ try EXPECT_EQ(rs_operator->name(), "or"); EXPECT_EQ( rs_operator->toDebugString(), - "{\"op\":\"or\",\"children\":[{\"op\":\"unsupported\",\"reason\":\"child of logical operator is not " - "function\",\"content\":\"tp: ColumnRef val: \"\\200\\000\\000\\000\\000\\000\\000\\001\" field_type { tp: " - "8 flag: 4097 flen: 0 decimal: 0 collate: 0 " - "}\"},{\"op\":\"unsupported\",\"reason\":\"child of logical operator is not " - "function\",\"content\":\"tp: Uint64 val: \"\\000\\000\\000\\000\\000\\000\\000\\001\" field_type { tp: 1 " - "flag: 4129 flen: 0 decimal: 0 collate: 0 }\"}]}"); + R"raw({"op":"or","children":[{"op":"unsupported","reason":"child of logical operator is not function, child_type=ColumnRef"},{"op":"unsupported","reason":"child of logical operator is not function, child_type=Uint64"}]})raw"); Block before_where_block = Block{ {toVec("col_1", {"a", "b", "c", "test1", "d", "test1", "pingcap", "tiflash"}), diff --git a/format-diff.py b/format-diff.py index 4674d7e40c8..8f496894a65 100755 --- a/format-diff.py +++ b/format-diff.py @@ -28,6 +28,14 @@ def run_cmd(cmd, show_cmd=False): print("RUN CMD: {}".format(cmd)) return res +def try_find_clang_format(exec_path): + candidates = ['clang-format-15', 'clang-format'] + if exec_path is not None: + candidates.insert(0, exec_path) + for c in candidates: + if which(c) is not None: + return c + return candidates[-1] def main(): default_suffix = ['.cpp', '.h', '.cc', '.hpp'] @@ -39,12 +47,14 @@ def main(): help='suffix of files to format, split by space', default=' '.join(default_suffix)) parser.add_argument('--ignore_suffix', help='ignore files with suffix, split by space') - parser.add_argument( - '--diff_from', help='commit hash/branch to check git diff', default='HEAD') + parser.add_argument('--diff_from', + help='commit hash/branch to check git diff', default='HEAD') parser.add_argument('--check_formatted', help='exit -1 if NOT formatted', action='store_true') parser.add_argument('--dump_diff_files_to', help='dump diff file names to specific path', default=None) + parser.add_argument('--clang_format', + help='path to clang-format', default=None) args = parser.parse_args() default_suffix = args.suffix.strip().split(' ') if args.suffix else [] @@ -83,9 +93,7 @@ def main(): if files_to_format: print('Files to format:\n {}'.format('\n '.join(files_to_format))) - clang_format_cmd = 'clang-format-15' - if which(clang_format_cmd) is None: - clang_format_cmd = 'clang-format' + clang_format_cmd = try_find_clang_format(args.clang_format) for file in files_to_format: cmd = clang_format_cmd + ' -i {}'.format(file) if subprocess.Popen(cmd, shell=True, cwd=tiflash_repo_path).wait(): diff --git a/libs/libprocess_metrics/CMakeLists.txt b/libs/libprocess_metrics/CMakeLists.txt index 2d6ffdfc3be..3d4cb1f90ad 100644 --- a/libs/libprocess_metrics/CMakeLists.txt +++ b/libs/libprocess_metrics/CMakeLists.txt @@ -25,7 +25,7 @@ add_custom_command(OUTPUT ${_PROCESS_METRICS_LIBRARY} "${_PROCESS_METRICS_HEADERS}" "${_PROCESS_METRICS_SOURCE_DIR}/Cargo.lock" "${_PROCESS_METRICS_SOURCE_DIR}/Cargo.toml" - "${TiFlash_SOURCE_DIR}/rust-toolchain") + "${TiFlash_SOURCE_DIR}/rust-toolchain.toml") add_custom_target(_process_metrics ALL DEPENDS ${_PROCESS_METRICS_LIBRARY} JOB_POOL rust_job_pool) add_library(process_metrics STATIC IMPORTED GLOBAL) diff --git a/libs/libsymbolization/CMakeLists.txt b/libs/libsymbolization/CMakeLists.txt index 0961d53458c..be5057af726 100644 --- a/libs/libsymbolization/CMakeLists.txt +++ b/libs/libsymbolization/CMakeLists.txt @@ -31,7 +31,7 @@ add_custom_command(OUTPUT ${_SYMBOLIZATION_LIBRARY} "${_SYMBOLIZATION_HEADERS}" "${_SYMBOLIZATION_SOURCE_DIR}/Cargo.lock" "${_SYMBOLIZATION_SOURCE_DIR}/Cargo.toml" - "${TiFlash_SOURCE_DIR}/rust-toolchain") + "${TiFlash_SOURCE_DIR}/rust-toolchain.toml") add_custom_target(symbolization ALL DEPENDS ${_SYMBOLIZATION_LIBRARY}) add_library(libsymbolization STATIC IMPORTED GLOBAL) diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 44c664a11b0..067e2b6a752 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -52,7 +52,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1716350266980, + "iteration": 1718272201438, "links": [], "panels": [ { @@ -8890,7 +8890,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Information about schema of column file, to learn the memory usage of schema", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] @@ -8898,13 +8898,13 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 8, + "h": 7, "w": 12, - "x": 12, + "x": 0, "y": 102 }, "hiddenSeries": false, - "id": 168, + "id": 291, "legend": { "avg": false, "current": false, @@ -8916,6 +8916,7 @@ }, "lines": true, "linewidth": 1, + "links": [], "nullPointMode": "null", "options": { "alertThreshold": true @@ -8932,42 +8933,19 @@ "targets": [ { "exemplar": true, - "expr": "max(tiflash_shared_block_schemas{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"current_size\"}) by (instance)", - "interval": "", - "legendFormat": "current_size", - "queryType": "randomWalk", - "refId": "A" - }, - { - "exemplar": true, - "expr": "sum(rate(tiflash_shared_block_schemas{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"hit_count\"}[1m])) by (instance)", - "hide": false, + "expr": "tiflash_memory_usage_by_class{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", "interval": "", - "legendFormat": "hit_count_ops", + "intervalFactor": 2, + "legendFormat": "{{instance}}-{{type}}", "refId": "B" - }, - { - "exemplar": true, - "expr": "max(tiflash_shared_block_schemas{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"still_used_when_evict\"}) by (instance)", - "hide": false, - "interval": "", - "legendFormat": "still_used_when_evict", - "refId": "C" - }, - { - "exemplar": true, - "expr": "max(tiflash_shared_block_schemas{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"miss_count\"}) by (instance)", - "hide": false, - "interval": "", - "legendFormat": "miss_count", - "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Schema of Column File", + "title": "Memory by class", "tooltip": { "shared": true, "sort": 0, @@ -8983,7 +8961,7 @@ }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -9021,7 +8999,7 @@ "h": 7, "w": 12, "x": 12, - "y": 110 + "y": 102 }, "hiddenSeries": false, "id": 289, @@ -9111,6 +9089,126 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "Information about schema of column file, to learn the memory usage of schema", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 109 + }, + "hiddenSeries": false, + "id": 168, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "max(tiflash_shared_block_schemas{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"current_size\"}) by (instance)", + "interval": "", + "legendFormat": "current_size", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_shared_block_schemas{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"hit_count\"}[1m])) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "hit_count_ops", + "refId": "B" + }, + { + "exemplar": true, + "expr": "max(tiflash_shared_block_schemas{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"still_used_when_evict\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "still_used_when_evict", + "refId": "C" + }, + { + "exemplar": true, + "expr": "max(tiflash_shared_block_schemas{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"miss_count\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "miss_count", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Schema of Column File", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, @@ -9754,7 +9852,7 @@ "h": 8, "w": 12, "x": 0, - "y": 104 + "y": 8 }, "hiddenSeries": false, "id": 128, @@ -9897,7 +9995,7 @@ "h": 8, "w": 12, "x": 12, - "y": 104 + "y": 8 }, "hiddenSeries": false, "id": 129, @@ -10014,7 +10112,7 @@ "h": 8, "w": 12, "x": 0, - "y": 112 + "y": 16 }, "heatmap": {}, "hideZeroBuckets": true, @@ -10076,7 +10174,7 @@ "h": 8, "w": 12, "x": 12, - "y": 112 + "y": 16 }, "hiddenSeries": false, "id": 158, @@ -10212,7 +10310,7 @@ "h": 8, "w": 12, "x": 0, - "y": 120 + "y": 24 }, "hiddenSeries": false, "id": 163, @@ -10317,7 +10415,7 @@ "h": 8, "w": 12, "x": 12, - "y": 120 + "y": 24 }, "hiddenSeries": false, "id": 162, @@ -10437,7 +10535,7 @@ "h": 8, "w": 12, "x": 0, - "y": 128 + "y": 32 }, "hiddenSeries": false, "id": 164, @@ -10481,6 +10579,17 @@ "legendFormat": "num_pages-{{instance}}", "refId": "A", "step": 10 + }, + { + "exemplar": true, + "expr": "tiflash_system_asynchronous_metric_VersionedEntries{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "num_entries-{{instance}}", + "refId": "B", + "step": 10 } ], "thresholds": [], @@ -10546,7 +10655,7 @@ "h": 8, "w": 12, "x": 12, - "y": 128 + "y": 32 }, "hiddenSeries": false, "id": 123, @@ -10677,7 +10786,7 @@ "h": 8, "w": 12, "x": 0, - "y": 136 + "y": 40 }, "height": "", "hiddenSeries": false, @@ -10786,7 +10895,7 @@ "h": 8, "w": 12, "x": 12, - "y": 136 + "y": 40 }, "hiddenSeries": false, "id": 231, @@ -10892,7 +11001,7 @@ "h": 9, "w": 24, "x": 0, - "y": 144 + "y": 48 }, "hiddenSeries": false, "id": 232, @@ -13721,6 +13830,78 @@ "alignLevel": null } }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateSpectral", + "exponent": 0.5, + "min": 0, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 121 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 290, + "legend": { + "show": true + }, + "links": [], + "reverseYBuckets": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(delta(tiflash_raft_command_throughput_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"prehandle_snapshot\"}[1m])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{le}}", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Snapshot Prehandle Throughput Heatmap", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": 0, + "format": "bytes", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketBound": "upper", + "yBucketNumber": null, + "yBucketSize": null + }, { "cards": { "cardPadding": null, diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 120000 index 00000000000..82e224aad22 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1 @@ +contrib/tiflash-proxy/rust-toolchain.toml \ No newline at end of file