From c4ad3f1442c39fb2d76d40582e0e16e4dd853d56 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Tue, 26 Jul 2022 18:49:10 +0800 Subject: [PATCH 01/17] fix hexInt `sprintf` format pattern (#5460) close pingcap/tiflash#5462 --- dbms/src/Functions/FunctionsString.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index ef8583b4f93..aac766e3e53 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -5281,18 +5281,17 @@ class FunctionHexInt : public IFunction ColumnString::Offsets & res_offsets = col_res->getOffsets(); res_offsets.resize(size); - size_t prev_res_offset = 0; + auto res_chars_iter = res_chars.begin(); for (size_t i = 0; i < size; ++i) { UInt64 number = col->getUInt(i); - int print_size = sprintf(reinterpret_cast(&res_chars[prev_res_offset]), "%lX", number); - res_chars[prev_res_offset + print_size] = 0; + res_chars_iter = fmt::format_to(res_chars_iter, "{:X}", number); + *(++res_chars_iter) = 0; // Add the size of printed string and a tailing zero - prev_res_offset += print_size + 1; - res_offsets[i] = prev_res_offset; + res_offsets[i] = res_chars_iter - res_chars.begin(); } - res_chars.resize(prev_res_offset); + res_chars.resize(res_chars_iter - res_chars.begin()); block.getByPosition(result).column = std::move(col_res); From 53e2c42476c0683d906d6a8fa3ea00dd7a46f9e8 Mon Sep 17 00:00:00 2001 From: jinhelin Date: Wed, 27 Jul 2022 09:55:10 +0800 Subject: [PATCH 02/17] Fix typo of Metrics (#5472) close pingcap/tiflash#5473 --- dbms/src/Common/TiFlashMetrics.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 5b627783544..90a2ff3aa0c 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -217,7 +217,7 @@ namespace DB F(type_get_cache_miss, {"type", "get_cache_miss"}), \ F(type_get_cache_part, {"type", "get_cache_part"}), \ F(type_get_cache_hit, {"type", "get_cache_hit"}), \ - F(type_get_cache_copy, {"type", "add_cache_copy"})) \ + F(type_get_cache_copy, {"type", "get_cache_copy"})) \ M(tiflash_storage_read_thread_gauge, "The gauge of storage read thread", Gauge, \ F(type_merged_task, {"type", "merged_task"})) \ M(tiflash_storage_read_thread_seconds, "Bucketed histogram of read thread", Histogram, \ From 28d399165617954dd942412b7d4894c1564abc07 Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Wed, 27 Jul 2022 10:35:11 +0800 Subject: [PATCH 03/17] refine FlashService (#5471) ref pingcap/tiflash#4609 --- dbms/src/Flash/FlashService.cpp | 42 ++++++++++++++++----------------- dbms/src/Flash/FlashService.h | 8 +++---- dbms/src/Server/Server.cpp | 4 ++-- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index 28e40d29d7c..1ab5a36d606 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -44,15 +44,15 @@ extern const int NOT_IMPLEMENTED; constexpr char tls_err_msg[] = "common name check is failed"; -FlashService::FlashService(IServer & server_) - : server(server_) - , security_config(server_.securityConfig()) +FlashService::FlashService(const TiFlashSecurityConfig & security_config_, Context & context_) + : security_config(security_config_) + , context(context_) , log(&Poco::Logger::get("FlashService")) , manual_compact_manager(std::make_unique( - server_.context().getGlobalContext(), - server_.context().getGlobalContext().getSettingsRef())) + context.getGlobalContext(), + context.getGlobalContext().getSettingsRef())) { - auto settings = server_.context().getSettingsRef(); + auto settings = context.getSettingsRef(); enable_local_tunnel = settings.enable_local_tunnel; enable_async_grpc_client = settings.enable_async_grpc_client; const size_t default_size = 2 * getNumberOfPhysicalCPUCores(); @@ -364,8 +364,8 @@ std::tuple FlashService::createDBContext(const grpc::S try { /// Create DB context. - auto context = std::make_shared(server.context()); - context->setGlobalContext(server.context()); + auto tmp_context = std::make_shared(context); + tmp_context->setGlobalContext(context); /// Set a bunch of client information. std::string user = getClientMetaVarWithDefault(grpc_context, "user", "default"); @@ -375,17 +375,17 @@ std::tuple FlashService::createDBContext(const grpc::S Int64 pos = peer.find(':'); if (pos == -1) { - return std::make_tuple(context, ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "Invalid peer address: " + peer)); + return std::make_tuple(tmp_context, ::grpc::Status(::grpc::StatusCode::INVALID_ARGUMENT, "Invalid peer address: " + peer)); } std::string client_ip = peer.substr(pos + 1); Poco::Net::SocketAddress client_address(client_ip); - context->setUser(user, password, client_address, quota_key); + tmp_context->setUser(user, password, client_address, quota_key); String query_id = getClientMetaVarWithDefault(grpc_context, "query_id", ""); - context->setCurrentQueryId(query_id); + tmp_context->setCurrentQueryId(query_id); - ClientInfo & client_info = context->getClientInfo(); + ClientInfo & client_info = tmp_context->getClientInfo(); client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY; client_info.interface = ClientInfo::Interface::GRPC; @@ -393,35 +393,35 @@ std::tuple FlashService::createDBContext(const grpc::S std::string dag_records_per_chunk_str = getClientMetaVarWithDefault(grpc_context, "dag_records_per_chunk", ""); if (!dag_records_per_chunk_str.empty()) { - context->setSetting("dag_records_per_chunk", dag_records_per_chunk_str); + tmp_context->setSetting("dag_records_per_chunk", dag_records_per_chunk_str); } String max_threads = getClientMetaVarWithDefault(grpc_context, "tidb_max_tiflash_threads", ""); if (!max_threads.empty()) { - context->setSetting("max_threads", max_threads); + tmp_context->setSetting("max_threads", max_threads); LOG_FMT_INFO(log, "set context setting max_threads to {}", max_threads); } - context->setSetting("enable_async_server", is_async ? "true" : "false"); - context->setSetting("enable_local_tunnel", enable_local_tunnel ? "true" : "false"); - context->setSetting("enable_async_grpc_client", enable_async_grpc_client ? "true" : "false"); - return std::make_tuple(context, grpc::Status::OK); + tmp_context->setSetting("enable_async_server", is_async ? "true" : "false"); + tmp_context->setSetting("enable_local_tunnel", enable_local_tunnel ? "true" : "false"); + tmp_context->setSetting("enable_async_grpc_client", enable_async_grpc_client ? "true" : "false"); + return std::make_tuple(tmp_context, grpc::Status::OK); } catch (Exception & e) { LOG_FMT_ERROR(log, "DB Exception: {}", e.message()); - return std::make_tuple(std::make_shared(server.context()), grpc::Status(tiflashErrorCodeToGrpcStatusCode(e.code()), e.message())); + return std::make_tuple(std::make_shared(context), grpc::Status(tiflashErrorCodeToGrpcStatusCode(e.code()), e.message())); } catch (const std::exception & e) { LOG_FMT_ERROR(log, "std exception: {}", e.what()); - return std::make_tuple(std::make_shared(server.context()), grpc::Status(grpc::StatusCode::INTERNAL, e.what())); + return std::make_tuple(std::make_shared(context), grpc::Status(grpc::StatusCode::INTERNAL, e.what())); } catch (...) { LOG_FMT_ERROR(log, "other exception"); - return std::make_tuple(std::make_shared(server.context()), grpc::Status(grpc::StatusCode::INTERNAL, "other exception")); + return std::make_tuple(std::make_shared(context), grpc::Status(grpc::StatusCode::INTERNAL, "other exception")); } } diff --git a/dbms/src/Flash/FlashService.h b/dbms/src/Flash/FlashService.h index 2b39479ac49..67425a0755b 100644 --- a/dbms/src/Flash/FlashService.h +++ b/dbms/src/Flash/FlashService.h @@ -46,7 +46,7 @@ class FlashService : public tikvpb::Tikv::Service , private boost::noncopyable { public: - explicit FlashService(IServer & server_); + FlashService(const TiFlashSecurityConfig & security_config_, Context & context_); ~FlashService() override; @@ -83,8 +83,8 @@ class FlashService : public tikvpb::Tikv::Service protected: std::tuple createDBContext(const grpc::ServerContext * grpc_context) const; - IServer & server; const TiFlashSecurityConfig & security_config; + Context & context; Poco::Logger * log; bool is_async = false; bool enable_local_tunnel = false; @@ -103,8 +103,8 @@ class AsyncFlashService final : public FlashService // 48 is EstablishMPPConnection API ID of GRPC // note: if the kvrpc protocal is updated, please keep consistent with the generated code. static constexpr int EstablishMPPConnectionApiID = 48; - explicit AsyncFlashService(IServer & server) - : FlashService(server) + AsyncFlashService(const TiFlashSecurityConfig & security_config_, Context & context_) + : FlashService(security_config_, context_) { is_async = true; ::grpc::Service::MarkMethodAsync(EstablishMPPConnectionApiID); diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index fa34400d5e3..f6dfff34698 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -601,9 +601,9 @@ class Server::FlashGrpcServerHolder /// Init and register flash service. bool enable_async_server = server.context().getSettingsRef().enable_async_server; if (enable_async_server) - flash_service = std::make_unique(server); + flash_service = std::make_unique(server.securityConfig(), server.context()); else - flash_service = std::make_unique(server); + flash_service = std::make_unique(server.securityConfig(), server.context()); diagnostics_service = std::make_unique(server); builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS, 5 * 1000)); builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_HTTP2_MIN_SENT_PING_INTERVAL_WITHOUT_DATA_MS, 10 * 1000)); From e5f0eb4d305598d496eacb737e16714c7b9711cd Mon Sep 17 00:00:00 2001 From: Fu Zhe Date: Wed, 27 Jul 2022 11:21:11 +0800 Subject: [PATCH 04/17] *: improve RUNTIME_CHECK and RUNTIME_ASSERT (#5434) close pingcap/tiflash#5444 --- dbms/src/Common/Exception.cpp | 53 ++++--- dbms/src/Common/Exception.h | 150 ++++++++++++------ dbms/src/Common/FailPoint.cpp | 4 +- dbms/src/Common/FailPoint.h | 2 +- dbms/src/Common/Logger.h | 64 +++++--- dbms/src/Common/TiFlashSecurity.h | 2 +- dbms/src/Encryption/RateLimiter.cpp | 6 +- dbms/src/Encryption/RateLimiter.h | 16 +- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 4 +- dbms/src/Flash/Mpp/ExchangeReceiver.cpp | 2 +- dbms/src/Server/RaftConfigParser.cpp | 2 +- dbms/src/Server/RaftConfigParser.h | 2 +- dbms/src/Server/Server.cpp | 26 +-- dbms/src/Server/StorageConfigParser.cpp | 10 +- dbms/src/Server/StorageConfigParser.h | 11 +- dbms/src/Server/UserConfigParser.cpp | 2 +- dbms/src/Server/UserConfigParser.h | 3 +- .../src/Server/tests/gtest_storage_config.cpp | 8 +- dbms/src/TestUtils/FunctionTestUtils.h | 4 +- 19 files changed, 231 insertions(+), 140 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 5e95156eb5d..1ef2166e032 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -24,7 +24,6 @@ #include #include - namespace DB { namespace ErrorCodes @@ -35,7 +34,6 @@ extern const int UNKNOWN_EXCEPTION; extern const int CANNOT_TRUNCATE_FILE; } // namespace ErrorCodes - void throwFromErrno(const std::string & s, int code, int e) { const size_t buf_size = 128; @@ -54,14 +52,18 @@ void throwFromErrno(const std::string & s, int code, int e) strcpy(buf, unknown_message); strcpy(buf + strlen(unknown_message), code); } - throw ErrnoException(s + ", errno: " + toString(e) + ", strerror: " + std::string(buf), code, e); + throw ErrnoException(s + ", errno: " + toString(e) + ", strerror: " + std::string(buf), + code, + e); #else - throw ErrnoException(s + ", errno: " + toString(e) + ", strerror: " + std::string(strerror_r(e, buf, sizeof(buf))), code, e); + throw ErrnoException(s + ", errno: " + toString(e) + ", strerror: " + std::string(strerror_r(e, buf, sizeof(buf))), + code, + e); #endif } - -void tryLogCurrentException(const char * log_name, const std::string & start_of_message) +void tryLogCurrentException(const char * log_name, + const std::string & start_of_message) { tryLogCurrentException(&Poco::Logger::get(log_name), start_of_message); } @@ -75,19 +77,22 @@ void tryLogCurrentException(const char * log_name, const std::string & start_of_ { \ } -void tryLogCurrentException(const LoggerPtr & logger, const std::string & start_of_message) +void tryLogCurrentException(const LoggerPtr & logger, + const std::string & start_of_message) { TRY_LOG_CURRENT_EXCEPTION(logger, start_of_message); } -void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message) +void tryLogCurrentException(Poco::Logger * logger, + const std::string & start_of_message) { TRY_LOG_CURRENT_EXCEPTION(logger, start_of_message); } #undef TRY_LOG_CURRENT_EXCEPTION -std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace) +std::string getCurrentExceptionMessage(bool with_stacktrace, + bool check_embedded_stacktrace) { std::stringstream stream; @@ -103,8 +108,10 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded { try { - stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() - << ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what(); + stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION + << ", e.code() = " << e.code() + << ", e.displayText() = " << e.displayText() + << ", e.what() = " << e.what(); } catch (...) { @@ -120,7 +127,8 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what(); + stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION + << ", type: " << name << ", e.what() = " << e.what(); } catch (...) { @@ -136,7 +144,8 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name; + stream << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION + << ", type: " << name; } catch (...) { @@ -146,7 +155,6 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded return stream.str(); } - int getCurrentExceptionCode() { try @@ -171,7 +179,6 @@ int getCurrentExceptionCode() } } - void rethrowFirstException(const Exceptions & exceptions) { for (const auto & exception : exceptions) @@ -179,7 +186,6 @@ void rethrowFirstException(const Exceptions & exceptions) std::rethrow_exception(exception); } - std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace) { std::stringstream stream; @@ -200,7 +206,8 @@ std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool } } - stream << "Code: " << e.code() << ", e.displayText() = " << text << ", e.what() = " << e.what(); + stream << "Code: " << e.code() << ", e.displayText() = " << text + << ", e.what() = " << e.what(); if (with_stacktrace && !has_embedded_stack_trace) stream << ", Stack trace:\n\n" @@ -225,7 +232,6 @@ std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace) } } - std::string ExecutionStatus::serializeText() const { WriteBufferFromOwnString wb; @@ -254,11 +260,20 @@ bool ExecutionStatus::tryDeserializeText(const std::string & data) return true; } -ExecutionStatus ExecutionStatus::fromCurrentException(const std::string & start_of_message) +ExecutionStatus +ExecutionStatus::fromCurrentException(const std::string & start_of_message) { String msg = (start_of_message.empty() ? "" : (start_of_message + ": ")) + getCurrentExceptionMessage(false, true); return ExecutionStatus(getCurrentExceptionCode(), msg); } +namespace exception_details +{ +const LoggerPtr & getDefaultFatalLogger() +{ + static const auto logger = std::make_shared("DefaultFatal", ""); + return logger; +} +} // namespace exception_details } // namespace DB diff --git a/dbms/src/Common/Exception.h b/dbms/src/Common/Exception.h index 3322c99bce0..2886ae4d5ec 100644 --- a/dbms/src/Common/Exception.h +++ b/dbms/src/Common/Exception.h @@ -14,26 +14,17 @@ #pragma once +#include #include #include -#include +#include #include #include #include - -namespace Poco -{ -class Logger; -} - - namespace DB { -class Logger; -using LoggerPtr = std::shared_ptr; - class Exception : public Poco::Exception { public: @@ -73,8 +64,8 @@ class Exception : public Poco::Exception StackTrace trace; }; - -/// Contains an additional member `saved_errno`. See the throwFromErrno function. +/// Contains an additional member `saved_errno`. See the throwFromErrno +/// function. class ErrnoException : public Exception { public: @@ -97,33 +88,33 @@ class ErrnoException : public Exception int saved_errno; }; - using Exceptions = std::vector; - [[noreturn]] void throwFromErrno(const std::string & s, int code = 0, int e = errno); - /** Try to write an exception to the log (and forget about it). - * Can be used in destructors in the catch-all block. - */ -void tryLogCurrentException(const char * log_name, const std::string & start_of_message = ""); -void tryLogCurrentException(const LoggerPtr & logger, const std::string & start_of_message = ""); -void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = ""); - + * Can be used in destructors in the catch-all block. + */ +void tryLogCurrentException(const char * log_name, + const std::string & start_of_message = ""); +void tryLogCurrentException(const LoggerPtr & logger, + const std::string & start_of_message = ""); +void tryLogCurrentException(Poco::Logger * logger, + const std::string & start_of_message = ""); /** Prints current exception in canonical format. - * with_stacktrace - prints stack trace for DB::Exception. - * check_embedded_stacktrace - if DB::Exception has embedded stacktrace then - * only this stack trace will be printed. - */ -std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace = false); + * with_stacktrace - prints stack trace for DB::Exception. + * check_embedded_stacktrace - if DB::Exception has embedded stacktrace then + * only this stack trace will be printed. + */ +std::string getCurrentExceptionMessage(bool with_stacktrace, + bool check_embedded_stacktrace = false); /// Returns error code from ErrorCodes int getCurrentExceptionCode(); - -/// An execution status of any piece of code, contains return code and optional error +/// An execution status of any piece of code, contains return code and optional +/// error struct ExecutionStatus { int code = 0; @@ -131,12 +122,14 @@ struct ExecutionStatus ExecutionStatus() = default; - explicit ExecutionStatus(int return_code, const std::string & exception_message = "") + explicit ExecutionStatus(int return_code, + const std::string & exception_message = "") : code(return_code) , message(exception_message) {} - static ExecutionStatus fromCurrentException(const std::string & start_of_message = ""); + static ExecutionStatus + fromCurrentException(const std::string & start_of_message = ""); std::string serializeText() const; @@ -145,14 +138,11 @@ struct ExecutionStatus bool tryDeserializeText(const std::string & data); }; - std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace = false); std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace); - void rethrowFirstException(const Exceptions & exceptions); - template std::enable_if_t, T> exception_cast(std::exception_ptr e) { @@ -172,28 +162,88 @@ std::enable_if_t, T> exception_cast(std::exception_ptr e) namespace exception_details { -template -inline std::string generateLogMessage(const char * condition, T && fmt_str, Args &&... args) +inline std::string generateFormattedMessage(const char * condition) +{ + return fmt::format("Assert {} fail!", condition); +} + +template +inline std::string generateFormattedMessage(const char * condition, const char * fmt_str, Args &&... args) +{ + return FmtBuffer().fmtAppend("Assert {} fail! ", condition).fmtAppend(fmt_str, std::forward(args)...).toString(); +} + +template +inline Poco::Message generateLogMessage(const std::string & logger_name, const char * filename, int lineno, const char * condition, Args &&... args) +{ + return Poco::Message( + logger_name, + generateFormattedMessage(condition, std::forward(args)...), + Poco::Message::PRIO_FATAL, + filename, + lineno); +} + +const LoggerPtr & getDefaultFatalLogger(); + +template +inline void log(const char * filename, int lineno, const char * condition, const LoggerPtr & logger, Args &&... args) +{ + if (logger->fatal()) + { + auto message = generateLogMessage( + logger->name(), + filename, + lineno, + condition, + std::forward(args)...); + logger->log(message); + } +} + +inline void log(const char * filename, int lineno, const char * condition) +{ + log(filename, lineno, condition, getDefaultFatalLogger()); +} + +template +inline void log(const char * filename, int lineno, const char * condition, const char * fmt_str, Args &&... args) { - return fmt::format(std::forward(fmt_str), condition, std::forward(args)...); + log(filename, lineno, condition, getDefaultFatalLogger(), fmt_str, std::forward(args)...); } } // namespace exception_details -#define RUNTIME_CHECK(condition, ExceptionType, ...) \ - do \ - { \ - if (unlikely(!(condition))) \ - throw ExceptionType(__VA_ARGS__); \ +/// Usage: +/// ``` +/// RUNTIME_CHECK(a != b, Exception("{} does not equal to {}", a, b)); +/// ``` +#define RUNTIME_CHECK(condition, ExceptionGenerationCode) \ + do \ + { \ + if (unlikely(!(condition))) \ + throw(ExceptionGenerationCode); \ } while (false) -#define RUNTIME_ASSERT(condition, logger, ...) \ - do \ - { \ - if (unlikely(!(condition))) \ - { \ - LOG_FATAL((logger), exception_details::generateLogMessage(#condition, "Assert {} fail! " __VA_ARGS__)); \ - std::terminate(); \ - } \ +/// Usage: +/// ``` +/// RUNTIME_ASSERT(a != b); +/// RUNTIME_ASSERT(a != b, "fail"); +/// RUNTIME_ASSERT(a != b, "{} does not equal to {}", a, b); +/// RUNTIME_ASSERT(a != b, logger); +/// RUNTIME_ASSERT(a != b, logger, "{} does not equal to {}", a, b); +/// ``` +#define RUNTIME_ASSERT(condition, ...) \ + do \ + { \ + if (unlikely(!(condition))) \ + { \ + exception_details::log( \ + &__FILE__[LogFmtDetails::getFileNameOffset(__FILE__)], \ + __LINE__, \ + #condition, \ + ##__VA_ARGS__); \ + std::terminate(); \ + } \ } while (false) } // namespace DB diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index ad5010d7826..fe04d68eac8 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -227,7 +227,7 @@ void FailPointHelper::wait(const String & fail_point_name) } } -void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration & config, Poco::Logger * log) +void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration & config, const LoggerPtr & log) { String random_fail_point_cfg = config.getString("flash.random_fail_points", ""); if (random_fail_point_cfg.empty()) @@ -272,7 +272,7 @@ void FailPointHelper::disableFailPoint(const String &) {} void FailPointHelper::wait(const String &) {} -void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration &, Poco::Logger *) {} +void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration &, const LoggerPtr &) {} void FailPointHelper::enableRandomFailPoint(const String &, double) {} #endif diff --git a/dbms/src/Common/FailPoint.h b/dbms/src/Common/FailPoint.h index 31df2dbdcd2..dc0913fc620 100644 --- a/dbms/src/Common/FailPoint.h +++ b/dbms/src/Common/FailPoint.h @@ -60,7 +60,7 @@ class FailPointHelper * 2. Parse flash.random_fail_points, which expect to has "FailPointA-RatioA,FailPointB-RatioB,..." format * 3. Call enableRandomFailPoint method with parsed FailPointName and Rate */ - static void initRandomFailPoints(Poco::Util::LayeredConfiguration & config, Poco::Logger * log); + static void initRandomFailPoints(Poco::Util::LayeredConfiguration & config, const LoggerPtr & log); static void enableRandomFailPoint(const String & fail_point_name, double rate); diff --git a/dbms/src/Common/Logger.h b/dbms/src/Common/Logger.h index 02aaa4d8cbe..d457dfac294 100644 --- a/dbms/src/Common/Logger.h +++ b/dbms/src/Common/Logger.h @@ -27,13 +27,15 @@ using LoggerPtr = std::shared_ptr; /** * Logger is to support identifiers based on Poco::Logger. * - * Identifiers could be request_id, session_id, etc. They can be used in `LogSearch` when we want to - * glob all logs related to one request/session/query. + * Identifiers could be request_id, session_id, etc. They can be used in + * `LogSearch` when we want to glob all logs related to one + * request/session/query. * - * Logger will print all identifiers at the front of each log record (and after the `source`). + * Logger will print all identifiers at the front of each log record (and after + * the `source`). * - * Interfaces in Logger are definitely the same with the Poco::Logger, so that they could use the same - * macro such as LOG_INFO() etc. + * Interfaces in Logger are definitely the same with the Poco::Logger, so that + * they could use the same macro such as LOG_INFO() etc. */ class Logger : private boost::noncopyable { @@ -59,23 +61,24 @@ class Logger : private boost::noncopyable Logger(const std::string & source, const std::string & identifier) : Logger(&Poco::Logger::get(source), identifier) - { - } + {} Logger(Poco::Logger * source_log, const std::string & identifier) : logger(source_log) , id(identifier) - { - } - -#define M(level) \ - bool level() const { return logger->level(); } \ - void level(const std::string & msg) const \ - { \ - if (id.empty()) \ - logger->level(msg); \ - else \ - logger->level(wrapMsg(msg)); \ + {} + +#define M(level) \ + bool level() const \ + { \ + return logger->level(); \ + } \ + void level(const std::string & msg) const \ + { \ + if (id.empty()) \ + logger->level(msg); \ + else \ + logger->level(wrapMsg(msg)); \ } M(trace) @@ -101,15 +104,30 @@ class Logger : private boost::noncopyable return logger->log(msg); } - bool is(int level) const { return logger->is(level); } + bool is(int level) const + { + return logger->is(level); + } - Poco::Channel * getChannel() const { return logger->getChannel(); } + Poco::Channel * getChannel() const + { + return logger->getChannel(); + } - const std::string & name() const { return logger->name(); } + const std::string & name() const + { + return logger->name(); + } - const std::string & identifier() const { return id; } + const std::string & identifier() const + { + return id; + } - Poco::Logger * getLog() const { return logger; } + Poco::Logger * getLog() const + { + return logger; + } private: template diff --git a/dbms/src/Common/TiFlashSecurity.h b/dbms/src/Common/TiFlashSecurity.h index 8dde3fe5a98..56b98b5b02e 100644 --- a/dbms/src/Common/TiFlashSecurity.h +++ b/dbms/src/Common/TiFlashSecurity.h @@ -54,7 +54,7 @@ struct TiFlashSecurityConfig public: TiFlashSecurityConfig() = default; - TiFlashSecurityConfig(Poco::Util::LayeredConfiguration & config, Poco::Logger * log) + TiFlashSecurityConfig(Poco::Util::LayeredConfiguration & config, const LoggerPtr & log) { if (config.has("security")) { diff --git a/dbms/src/Encryption/RateLimiter.cpp b/dbms/src/Encryption/RateLimiter.cpp index 38fd8468341..8c986f93f71 100644 --- a/dbms/src/Encryption/RateLimiter.cpp +++ b/dbms/src/Encryption/RateLimiter.cpp @@ -298,7 +298,7 @@ ReadLimiter::ReadLimiter( , getIOStatistic(std::move(getIOStatistic_)) , last_stat_bytes(getIOStatistic()) , last_stat_time(now()) - , log(&Poco::Logger::get("ReadLimiter")) + , log(Logger::get("ReadLimiter")) , get_io_statistic_period_us(get_io_stat_period_us) {} @@ -382,7 +382,7 @@ void ReadLimiter::refillAndAlloc() } IORateLimiter::IORateLimiter() - : log(&Poco::Logger::get("IORateLimiter")) + : log(Logger::get("IORateLimiter")) , stop(false) {} @@ -693,7 +693,7 @@ IOLimitTuner::IOLimitTuner( , bg_read_stat(std::move(bg_read_stat_)) , fg_read_stat(std::move(fg_read_stat_)) , io_config(io_config_) - , log(&Poco::Logger::get("IOLimitTuner")) + , log(Logger::get("IOLimitTuner")) {} IOLimitTuner::TuneResult IOLimitTuner::tune() const diff --git a/dbms/src/Encryption/RateLimiter.h b/dbms/src/Encryption/RateLimiter.h index f44beeb8ed7..9d29ea10e0a 100644 --- a/dbms/src/Encryption/RateLimiter.h +++ b/dbms/src/Encryption/RateLimiter.h @@ -185,7 +185,7 @@ class ReadLimiter : public WriteLimiter return std::chrono::time_point_cast(std::chrono::system_clock::now()); } TimePoint last_stat_time; - Poco::Logger * log; + LoggerPtr log; Int64 get_io_statistic_period_us; }; @@ -260,7 +260,7 @@ class IORateLimiter std::vector bg_thread_ids; IOInfo last_io_info; - Poco::Logger * log; + LoggerPtr log; std::atomic stop; std::thread auto_tune_thread; @@ -419,8 +419,14 @@ class IOLimitTuner High = 3, Emergency = 4 }; - Watermark writeWatermark() const { return getWatermark(writePct()); } - Watermark readWatermark() const { return getWatermark(readPct()); } + Watermark writeWatermark() const + { + return getWatermark(writePct()); + } + Watermark readWatermark() const + { + return getWatermark(readPct()); + } Watermark getWatermark(int pct) const; // Returns @@ -475,6 +481,6 @@ class IOLimitTuner LimiterStatUPtr bg_read_stat; LimiterStatUPtr fg_read_stat; StorageIORateLimitConfig io_config; - Poco::Logger * log; + LoggerPtr log; }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 7fa32c316a1..b2e344189c3 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -745,8 +745,8 @@ void DAGQueryBlockInterpreter::handleExchangeSender(DAGPipeline & pipeline) stream = std::make_shared(stream, std::move(response_writer), log->identifier()); stream->setExtraInfo(enableFineGrainedShuffleExtraInfo); }); - RUNTIME_CHECK(exchange_sender.tp() == tipb::ExchangeType::Hash, Exception, "exchange_sender has to be hash partition when fine grained shuffle is enabled"); - RUNTIME_CHECK(stream_count <= 1024, Exception, "fine_grained_shuffle_stream_count should not be greater than 1024"); + RUNTIME_CHECK(exchange_sender.tp() == tipb::ExchangeType::Hash, Exception("exchange_sender has to be hash partition when fine grained shuffle is enabled")); + RUNTIME_CHECK(stream_count <= 1024, Exception("fine_grained_shuffle_stream_count should not be greater than 1024")); } else { diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp index ab8d83a1481..f5808952740 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp @@ -750,7 +750,7 @@ ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queuechunks.empty()); // Fine grained shuffle should only be enabled when sending data to TiFlash node. // So all data should be encoded into MPPDataPacket.chunks. - RUNTIME_CHECK(!enableFineGrainedShuffle(fine_grained_shuffle_stream_count), Exception, "Data should not be encoded into tipb::SelectResponse.chunks when fine grained shuffle is enabled"); + RUNTIME_CHECK(!enableFineGrainedShuffle(fine_grained_shuffle_stream_count), Exception("Data should not be encoded into tipb::SelectResponse.chunks when fine grained shuffle is enabled")); result.decode_detail = CoprocessorReader::decodeChunks(select_resp, block_queue, header, schema); } } diff --git a/dbms/src/Server/RaftConfigParser.cpp b/dbms/src/Server/RaftConfigParser.cpp index 2f0a88855cd..39730636431 100644 --- a/dbms/src/Server/RaftConfigParser.cpp +++ b/dbms/src/Server/RaftConfigParser.cpp @@ -30,7 +30,7 @@ extern const int INVALID_CONFIG_PARAMETER; } // namespace ErrorCodes /// Load raft related configs. -TiFlashRaftConfig TiFlashRaftConfig::parseSettings(Poco::Util::LayeredConfiguration & config, Poco::Logger * log) +TiFlashRaftConfig TiFlashRaftConfig::parseSettings(Poco::Util::LayeredConfiguration & config, const LoggerPtr & log) { TiFlashRaftConfig res; res.flash_server_addr = config.getString("flash.service_addr", "0.0.0.0:3930"); diff --git a/dbms/src/Server/RaftConfigParser.h b/dbms/src/Server/RaftConfigParser.h index 0eb78ba20a8..c42304289e2 100644 --- a/dbms/src/Server/RaftConfigParser.h +++ b/dbms/src/Server/RaftConfigParser.h @@ -46,7 +46,7 @@ struct TiFlashRaftConfig public: TiFlashRaftConfig() = default; - static TiFlashRaftConfig parseSettings(Poco::Util::LayeredConfiguration & config, Poco::Logger * log); + static TiFlashRaftConfig parseSettings(Poco::Util::LayeredConfiguration & config, const LoggerPtr & log); }; } // namespace DB diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index f6dfff34698..c03f7b7133e 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -157,7 +157,7 @@ void loadMiConfig(Logger * log) namespace { -[[maybe_unused]] void tryLoadBoolConfigFromEnv(Poco::Logger * log, bool & target, const char * name) +[[maybe_unused]] void tryLoadBoolConfigFromEnv(const DB::LoggerPtr & log, bool & target, const char * name) { auto * config = getenv(name); if (config) @@ -297,7 +297,7 @@ pingcap::ClusterConfig getClusterConfig(const TiFlashSecurityConfig & security_c return config; } -Poco::Logger * grpc_log = nullptr; +LoggerPtr grpc_log; void printGRPCLog(gpr_log_func_args * args) { @@ -344,7 +344,7 @@ struct TCPServer : Poco::Net::TCPServer } }; -void UpdateMallocConfig([[maybe_unused]] Poco::Logger * log) +void UpdateMallocConfig([[maybe_unused]] const LoggerPtr & log) { #ifdef RUN_FAIL_RETURN static_assert(false); @@ -435,7 +435,7 @@ struct RaftStoreProxyRunner : boost::noncopyable size_t stack_size = 1024 * 1024 * 20; }; - RaftStoreProxyRunner(RunRaftStoreProxyParms && parms_, Poco::Logger * log_) + RaftStoreProxyRunner(RunRaftStoreProxyParms && parms_, const LoggerPtr & log_) : parms(std::move(parms_)) , log(log_) {} @@ -470,11 +470,11 @@ struct RaftStoreProxyRunner : boost::noncopyable RunRaftStoreProxyParms parms; pthread_t thread{}; - Poco::Logger * log; + const LoggerPtr & log; }; // We only need this task run once. -void initStores(Context & global_context, Poco::Logger * log, bool lazily_init_store) +void initStores(Context & global_context, const LoggerPtr & log, bool lazily_init_store) { auto do_init_stores = [&global_context, log]() { auto storages = global_context.getTMTContext().getStorages().getAllStorage(); @@ -518,7 +518,7 @@ void initStores(Context & global_context, Poco::Logger * log, bool lazily_init_s } } -void handleRpcs(grpc::ServerCompletionQueue * curcq, Poco::Logger * log) +void handleRpcs(grpc::ServerCompletionQueue * curcq, const LoggerPtr & log) { GET_METRIC(tiflash_thread_count, type_total_rpc_async_worker).Increment(); SCOPE_EXIT({ @@ -579,7 +579,7 @@ void handleRpcs(grpc::ServerCompletionQueue * curcq, Poco::Logger * log) class Server::FlashGrpcServerHolder { public: - FlashGrpcServerHolder(Server & server, const TiFlashRaftConfig & raft_config, Poco::Logger * log_) + FlashGrpcServerHolder(Server & server, const TiFlashRaftConfig & raft_config, const LoggerPtr & log_) : log(log_) , is_shutdown(std::make_shared>(false)) { @@ -695,7 +695,7 @@ class Server::FlashGrpcServerHolder } private: - Poco::Logger * log; + const LoggerPtr & log; std::shared_ptr> is_shutdown; std::unique_ptr flash_service = nullptr; std::unique_ptr diagnostics_service = nullptr; @@ -709,7 +709,7 @@ class Server::FlashGrpcServerHolder class Server::TcpHttpServersHolder { public: - TcpHttpServersHolder(Server & server_, const Settings & settings, Poco::Logger * log_) + TcpHttpServersHolder(Server & server_, const Settings & settings, const LoggerPtr & log_) : server(server_) , log(log_) , server_pool(1, server.config().getUInt("max_connections", 1024)) @@ -983,7 +983,7 @@ class Server::TcpHttpServersHolder private: Server & server; - Poco::Logger * log; + const LoggerPtr & log; Poco::ThreadPool server_pool; std::vector> servers; }; @@ -992,7 +992,7 @@ int Server::main(const std::vector & /*args*/) { setThreadName("TiFlashMain"); - Poco::Logger * log = &logger(); + const auto log = std::make_shared(&logger(), ""); #ifdef FIU_ENABLE fiu_init(0); // init failpoint FailPointHelper::initRandomFailPoints(config(), log); @@ -1079,7 +1079,7 @@ int Server::main(const std::vector & /*args*/) } // print necessary grpc log. - grpc_log = &Poco::Logger::get("grpc"); + grpc_log = Logger::get("grpc"); gpr_set_log_verbosity(GPR_LOG_SEVERITY_DEBUG); gpr_set_log_function(&printGRPCLog); diff --git a/dbms/src/Server/StorageConfigParser.cpp b/dbms/src/Server/StorageConfigParser.cpp index d43ccb850f1..09b7807c397 100644 --- a/dbms/src/Server/StorageConfigParser.cpp +++ b/dbms/src/Server/StorageConfigParser.cpp @@ -61,7 +61,7 @@ static String getNormalizedPath(const String & s) return getCanonicalPath(Poco::Path{s}.toString()); } -void TiFlashStorageConfig::parseStoragePath(const String & storage, Poco::Logger * log) +void TiFlashStorageConfig::parseStoragePath(const String & storage, const LoggerPtr & log) { std::istringstream ss(storage); cpptoml::parser p(ss); @@ -181,7 +181,7 @@ void TiFlashStorageConfig::parseStoragePath(const String & storage, Poco::Logger } } -void TiFlashStorageConfig::parseMisc(const String & storage_section, Poco::Logger * log) +void TiFlashStorageConfig::parseMisc(const String & storage_section, const LoggerPtr & log) { std::istringstream ss(storage_section); cpptoml::parser p(ss); @@ -233,7 +233,7 @@ Strings TiFlashStorageConfig::getAllNormalPaths() const return all_normal_path; } -bool TiFlashStorageConfig::parseFromDeprecatedConfiguration(Poco::Util::LayeredConfiguration & config, Poco::Logger * log) +bool TiFlashStorageConfig::parseFromDeprecatedConfiguration(Poco::Util::LayeredConfiguration & config, const LoggerPtr & log) { if (!config.has("path")) return false; @@ -302,7 +302,7 @@ bool TiFlashStorageConfig::parseFromDeprecatedConfiguration(Poco::Util::LayeredC return true; } -std::tuple TiFlashStorageConfig::parseSettings(Poco::Util::LayeredConfiguration & config, Poco::Logger * log) +std::tuple TiFlashStorageConfig::parseSettings(Poco::Util::LayeredConfiguration & config, const LoggerPtr & log) { size_t global_capacity_quota = 0; // "0" by default, means no quota, use the whole disk capacity. TiFlashStorageConfig storage_config; @@ -379,7 +379,7 @@ std::tuple TiFlashStorageConfig::parseSettings(Poc return std::make_tuple(global_capacity_quota, storage_config); } -void StorageIORateLimitConfig::parse(const String & storage_io_rate_limit, Poco::Logger * log) +void StorageIORateLimitConfig::parse(const String & storage_io_rate_limit, const LoggerPtr & log) { std::istringstream ss(storage_io_rate_limit); cpptoml::parser p(ss); diff --git a/dbms/src/Server/StorageConfigParser.h b/dbms/src/Server/StorageConfigParser.h index 4efc5637634..f3a779d2f63 100644 --- a/dbms/src/Server/StorageConfigParser.h +++ b/dbms/src/Server/StorageConfigParser.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -75,7 +76,7 @@ struct StorageIORateLimitConfig , auto_tune_sec(5) {} - void parse(const String & storage_io_rate_limit, Poco::Logger * log); + void parse(const String & storage_io_rate_limit, const LoggerPtr & log); std::string toString() const; @@ -109,14 +110,14 @@ struct TiFlashStorageConfig Strings getAllNormalPaths() const; - static std::tuple parseSettings(Poco::Util::LayeredConfiguration & config, Poco::Logger * log); + static std::tuple parseSettings(Poco::Util::LayeredConfiguration & config, const LoggerPtr & log); private: - void parseStoragePath(const String & storage_section, Poco::Logger * log); + void parseStoragePath(const String & storage_section, const LoggerPtr & log); - bool parseFromDeprecatedConfiguration(Poco::Util::LayeredConfiguration & config, Poco::Logger * log); + bool parseFromDeprecatedConfiguration(Poco::Util::LayeredConfiguration & config, const LoggerPtr & log); - void parseMisc(const String & storage_section, Poco::Logger * log); + void parseMisc(const String & storage_section, const LoggerPtr & log); }; diff --git a/dbms/src/Server/UserConfigParser.cpp b/dbms/src/Server/UserConfigParser.cpp index 9d17ce4628c..21b5ea67b3e 100644 --- a/dbms/src/Server/UserConfigParser.cpp +++ b/dbms/src/Server/UserConfigParser.cpp @@ -50,7 +50,7 @@ ConfigReloaderPtr parseSettings( Poco::Util::LayeredConfiguration & config, const std::string & config_path, std::unique_ptr & global_context, - Poco::Logger * log) + const LoggerPtr & log) { std::string users_config_path = config.getString("users_config", String(1, '\0')); bool load_from_main_config_path = true; diff --git a/dbms/src/Server/UserConfigParser.h b/dbms/src/Server/UserConfigParser.h index 17e2f6a7029..395cf71b3ed 100644 --- a/dbms/src/Server/UserConfigParser.h +++ b/dbms/src/Server/UserConfigParser.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -38,7 +39,7 @@ ConfigReloaderPtr parseSettings( Poco::Util::LayeredConfiguration & config, const std::string & config_path, std::unique_ptr & global_context, - Poco::Logger * log); + const LoggerPtr & log); } } // namespace DB diff --git a/dbms/src/Server/tests/gtest_storage_config.cpp b/dbms/src/Server/tests/gtest_storage_config.cpp index 9f6b64526f4..bfccabe304c 100644 --- a/dbms/src/Server/tests/gtest_storage_config.cpp +++ b/dbms/src/Server/tests/gtest_storage_config.cpp @@ -37,13 +37,13 @@ class StorageConfigTest : public ::testing::Test { public: StorageConfigTest() - : log(&Poco::Logger::get("StorageConfigTest")) + : log(Logger::get("StorageConfigTest")) {} static void SetUpTestCase() {} protected: - Poco::Logger * log; + LoggerPtr log; }; TEST_F(StorageConfigTest, SimpleSinglePath) @@ -449,7 +449,7 @@ dir=["/data0/tiflash"] capacity=[ 1024 ] )", }; - Poco::Logger * log = &Poco::Logger::get("PathCapacityMetrics_test"); + auto log = Logger::get("PathCapacityMetrics_test"); for (size_t i = 0; i < tests.size(); ++i) { @@ -603,7 +603,7 @@ background_read_weight=2 )", }; - Poco::Logger * log = &Poco::Logger::get("StorageIORateLimitConfigTest"); + auto log = Logger::get("StorageIORateLimitConfigTest"); auto verify_default = [](const StorageIORateLimitConfig & io_config) { ASSERT_EQ(io_config.max_bytes_per_sec, 0); diff --git a/dbms/src/TestUtils/FunctionTestUtils.h b/dbms/src/TestUtils/FunctionTestUtils.h index 8680d1886b1..e75acabe56c 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.h +++ b/dbms/src/TestUtils/FunctionTestUtils.h @@ -398,7 +398,7 @@ typename TypeTraits::FieldType parseDecimal( const String & literal = [&] { if constexpr (Traits::is_nullable) { - assert(literal_.has_value()); + RUNTIME_ASSERT(literal_.has_value()); return literal_.value(); } else @@ -761,7 +761,7 @@ class FunctionTest : public ::testing::Test DAGContext & getDAGContext() { - assert(dag_context_ptr != nullptr); + RUNTIME_ASSERT(dag_context_ptr != nullptr); return *dag_context_ptr; } From ee2f753ac4c3ed6d0b4c281b3a35db7296d6aeeb Mon Sep 17 00:00:00 2001 From: SeaRise Date: Wed, 27 Jul 2022 15:23:11 +0800 Subject: [PATCH 05/17] refine Window Executor Test (#5464) ref pingcap/tiflash#4609, close pingcap/tiflash#5465 --- .../src/Flash/tests/gtest_window_executor.cpp | 216 ++++++++++++++++++ .../tests/gtest_window_functions.cpp | 191 ---------------- 2 files changed, 216 insertions(+), 191 deletions(-) create mode 100644 dbms/src/Flash/tests/gtest_window_executor.cpp delete mode 100644 dbms/src/WindowFunctions/tests/gtest_window_functions.cpp diff --git a/dbms/src/Flash/tests/gtest_window_executor.cpp b/dbms/src/Flash/tests/gtest_window_executor.cpp new file mode 100644 index 00000000000..d09122e5815 --- /dev/null +++ b/dbms/src/Flash/tests/gtest_window_executor.cpp @@ -0,0 +1,216 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +namespace DB::tests +{ +class WindowExecutorTestRunner : public DB::tests::ExecutorTest +{ + static const size_t max_concurrency_level = 10; + +public: + void initializeContext() override + { + ExecutorTest::initializeContext(); + context.addMockTable( + {"test_db", "test_table"}, + {{"partition", TiDB::TP::TypeLongLong}, {"order", TiDB::TP::TypeLongLong}}, + {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}); + context.addMockTable( + {"test_db", "test_table_string"}, + {{"partition", TiDB::TP::TypeString}, {"order", TiDB::TP::TypeString}}, + {toVec("partition", {"banana", "banana", "banana", "banana", "apple", "apple", "apple", "apple"}), + toVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"})}); + + context.addMockTable( + {"test_db", "test_table_more_cols"}, + {{"partition1", TiDB::TP::TypeLongLong}, {"partition2", TiDB::TP::TypeLongLong}, {"order1", TiDB::TP::TypeLongLong}, {"order2", TiDB::TP::TypeLongLong}}, + {toVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), + toVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), + toVec("order1", {2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1}), + toVec("order2", {2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1})}); + + context.addMockTable( + {"test_db", "test_table_float64"}, + {{"partition", TiDB::TP::TypeDouble}, {"order", TiDB::TP::TypeDouble}}, + {toVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}); + + context.addMockTable( + {"test_db", "test_table_datetime"}, + {{"partition", TiDB::TP::TypeDatetime}, {"order", TiDB::TP::TypeDatetime}}); + + context.addMockTable( + {"test_db", "test_table_for_rank"}, + {{"partition", TiDB::TP::TypeLongLong}, {"order", TiDB::TP::TypeLongLong}}, + {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}); + } + + void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) + { + ASSERT_COLUMNS_EQ_R(expect_columns, executeStreams(request)); + for (size_t i = 1; i <= max_concurrency_level; ++i) + { + ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i)); + } + } + + void executeWithTableScanAndConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & source_columns, const ColumnsWithTypeAndName & expect_columns) + { + ASSERT_COLUMNS_EQ_R(expect_columns, executeStreamsWithSingleSource(request, source_columns, SourceType::TableScan)); + for (size_t i = 1; i <= max_concurrency_level; ++i) + { + ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreamsWithSingleSource(request, source_columns, SourceType::TableScan)); + } + } +}; + +TEST_F(WindowExecutorTestRunner, testWindowFunctionByPartitionAndOrder) +try +{ + /***** row_number with different types of input *****/ + // int - sql : select *, row_number() over w1 from test1 window w1 as (partition by partition_int order by order_int) + auto request = context + .scan("test_db", "test_table") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + executeWithConcurrency( + request, + createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); + + // null input + executeWithTableScanAndConcurrency(request, + {toNullableVec("partition", {}), toNullableVec("order", {})}, + createColumns({})); + + // nullable + executeWithTableScanAndConcurrency( + request, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), + {toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}}, + createColumns({toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); + + // string - sql : select *, row_number() over w1 from test2 window w1 as (partition by partition_string order by order_string) + request = context + .scan("test_db", "test_table_string") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + + executeWithConcurrency(request, + createColumns({toNullableVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), + toNullableVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); + + // nullable + executeWithTableScanAndConcurrency(request, + {toNullableVec("partition", {"banana", "banana", "banana", "banana", {}, "apple", "apple", "apple", "apple"}), + toNullableVec("order", {"apple", "apple", "banana", "banana", {}, "apple", "apple", "banana", "banana"})}, + createColumns({toNullableVec("partition", {{}, "apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), + toNullableVec("order", {{}, "apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); + + // float64 - sql : select *, row_number() over w1 from test3 window w1 as (partition by partition_float order by order_float64) + request = context + .scan("test_db", "test_table_float64") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + + executeWithConcurrency(request, + createColumns({toNullableVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); + + // nullable + executeWithTableScanAndConcurrency(request, + {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}, + createColumns({toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), + toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); + + // datetime - select *, row_number() over w1 from test4 window w1 as (partition by partition_datetime order by order_datetime); + request = context + .scan("test_db", "test_table_datetime") + .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) + .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) + .build(context); + executeWithTableScanAndConcurrency(request, + {toNullableDatetimeVec("partition", {"20220101010102", "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), + toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}, + createColumns({toNullableDatetimeVec("partition", {"20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), + toNullableDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), + toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); + + // nullable + executeWithTableScanAndConcurrency(request, + {toNullableDatetimeVec("partition", {"20220101010102", {}, "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), + toNullableDatetimeVec("order", {"20220101010101", {}, "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}, + createColumns({toNullableDatetimeVec("partition", {{}, "20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), + toNullableDatetimeVec("order", {{}, "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), + toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); + + // 2 partiton key and 2 order key + // sql : select *, row_number() over w1 from test6 window w1 as (partition by partition_int1, partition_int2 order by order_int1,order_int2) + request = context + .scan("test_db", "test_table_more_cols") + .sort({{"partition1", false}, {"partition2", false}, {"order1", false}, {"order2", false}}, true) + .window(RowNumber(), {{"order1", false}, {"order2", false}}, {{"partition1", false}, {"partition2", false}}, buildDefaultRowsFrame()) + .build(context); + + executeWithConcurrency(request, + createColumns({toNullableVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), + toNullableVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), + toNullableVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), + toNullableVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), + toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})})); + + /***** rank, dense_rank *****/ + request = context.scan("test_db", "test_table_for_rank").sort({{"partition", false}, {"order", false}}, true).window({Rank(), DenseRank()}, {{"order", false}}, {{"partition", false}}, MockWindowFrame{}).build(context); + executeWithConcurrency(request, + createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})})); + + // nullable + executeWithTableScanAndConcurrency(request, + {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}, + createColumns({toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 1, 1, 2, 2, 1, 1, 2, 2})})); + + executeWithTableScanAndConcurrency( + request, + {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2})}, + createColumns({toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), + toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2}), + toNullableVec("rank", {1, 2, 1, 1, 3, 3, 1, 1, 3, 3}), + toNullableVec("dense_rank", {1, 2, 1, 1, 2, 2, 1, 1, 2, 2})})); +} +CATCH + +} // namespace DB::tests diff --git a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp b/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp deleted file mode 100644 index 06253cac66e..00000000000 --- a/dbms/src/WindowFunctions/tests/gtest_window_functions.cpp +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -namespace DB::tests -{ -class WindowExecutorTestRunner : public DB::tests::ExecutorTest -{ -public: - void initializeContext() override - { - ExecutorTest::initializeContext(); - context.addMockTable( - {"test_db", "test_table"}, - {{"partition", TiDB::TP::TypeLongLong}, {"order", TiDB::TP::TypeLongLong}}, - {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), - toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}); - context.addMockTable( - {"test_db", "test_table_string"}, - {{"partition", TiDB::TP::TypeString}, {"order", TiDB::TP::TypeString}}, - {toVec("partition", {"banana", "banana", "banana", "banana", "apple", "apple", "apple", "apple"}), - toVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"})}); - - context.addMockTable( - {"test_db", "test_table_more_cols"}, - {{"partition1", TiDB::TP::TypeLongLong}, {"partition2", TiDB::TP::TypeLongLong}, {"order1", TiDB::TP::TypeLongLong}, {"order2", TiDB::TP::TypeLongLong}}, - {toVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), - toVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), - toVec("order1", {2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1}), - toVec("order2", {2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1})}); - - context.addMockTable( - {"test_db", "test_table_float64"}, - {{"partition", TiDB::TP::TypeDouble}, {"order", TiDB::TP::TypeDouble}}, - {toVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), - toVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}); - - context.addMockTable( - {"test_db", "test_table_datetime"}, - {{"partition", TiDB::TP::TypeDatetime}, {"order", TiDB::TP::TypeDatetime}}); - - context.addMockTable( - {"test_db", "test_table_for_rank"}, - {{"partition", TiDB::TP::TypeLongLong}, {"order", TiDB::TP::TypeLongLong}}, - {toVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), - toVec("order", {1, 1, 2, 2, 1, 1, 2, 2})}); - } -}; - -TEST_F(WindowExecutorTestRunner, testWindowFunctionByPartitionAndOrder) -try -{ - /***** row_number with different types of input *****/ - // int - sql : select *, row_number() over w1 from test1 window w1 as (partition by partition_int order by order_int) - auto request = context - .scan("test_db", "test_table") - .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) - .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) - .build(context); - ASSERT_COLUMNS_EQ_R(executeStreams(request), - createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); - - // null input - executeStreamsWithSingleSource( - request, - {toNullableVec("partition", {}), toNullableVec("order", {})}, - {}); - - // nullable - ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), {toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}}), - createColumns({toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); - - // string - sql : select *, row_number() over w1 from test2 window w1 as (partition by partition_string order by order_string) - request = context - .scan("test_db", "test_table_string") - .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) - .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) - .build(context); - - ASSERT_COLUMNS_EQ_R(executeStreams(request), - createColumns({toNullableVec("partition", {"apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), - toNullableVec("order", {"apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); - - // nullable - ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, - {toNullableVec("partition", {"banana", "banana", "banana", "banana", {}, "apple", "apple", "apple", "apple"}), - toNullableVec("order", {"apple", "apple", "banana", "banana", {}, "apple", "apple", "banana", "banana"})}), - createColumns({toNullableVec("partition", {{}, "apple", "apple", "apple", "apple", "banana", "banana", "banana", "banana"}), - toNullableVec("order", {{}, "apple", "apple", "banana", "banana", "apple", "apple", "banana", "banana"}), - toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); - - // float64 - sql : select *, row_number() over w1 from test3 window w1 as (partition by partition_float order by order_float64) - request = context - .scan("test_db", "test_table_float64") - .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) - .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) - .build(context); - - ASSERT_COLUMNS_EQ_R(executeStreams(request), - createColumns({toNullableVec("partition", {1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), - toNullableVec("order", {1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); - - // nullable - ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, - {toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), - toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00})}), - createColumns({toNullableVec("partition", {{}, 1.00, 1.00, 1.00, 1.00, 2.00, 2.00, 2.00, 2.00}), - toNullableVec("order", {{}, 1.00, 1.00, 2.00, 2.00, 1.00, 1.00, 2.00, 2.00}), - toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); - - // datetime - select *, row_number() over w1 from test4 window w1 as (partition by partition_datetime order by order_datetime); - request = context - .scan("test_db", "test_table_datetime") - .sort({{"partition", false}, {"order", false}, {"partition", false}, {"order", false}}, true) - .window(RowNumber(), {"order", false}, {"partition", false}, buildDefaultRowsFrame()) - .build(context); - ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, - {toNullableDatetimeVec("partition", {"20220101010102", "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), - toDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}), - createColumns({toNullableDatetimeVec("partition", {"20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), - toNullableDatetimeVec("order", {"20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), - toNullableVec("row_number", {1, 2, 3, 4, 1, 2, 3, 4})})); - - // nullable - ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, - {toNullableDatetimeVec("partition", {"20220101010102", {}, "20220101010102", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010101", "20220101010101"}, 0), - toNullableDatetimeVec("order", {"20220101010101", {}, "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0)}), - createColumns({toNullableDatetimeVec("partition", {{}, "20220101010101", "20220101010101", "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010102", "20220101010102"}, 0), - toNullableDatetimeVec("order", {{}, "20220101010101", "20220101010101", "20220101010102", "20220101010102", "20220101010101", "20220101010101", "20220101010102", "20220101010102"}, 0), - toNullableVec("row_number", {1, 1, 2, 3, 4, 1, 2, 3, 4})})); - - // 2 partiton key and 2 order key - // sql : select *, row_number() over w1 from test6 window w1 as (partition by partition_int1, partition_int2 order by order_int1,order_int2) - request = context - .scan("test_db", "test_table_more_cols") - .sort({{"partition1", false}, {"partition2", false}, {"order1", false}, {"order2", false}}, true) - .window(RowNumber(), {{"order1", false}, {"order2", false}}, {{"partition1", false}, {"partition2", false}}, buildDefaultRowsFrame()) - .build(context); - - ASSERT_COLUMNS_EQ_R(executeStreams(request), - createColumns({toNullableVec("partition1", {1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2}), - toNullableVec("partition2", {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}), - toNullableVec("order1", {1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2}), - toNullableVec("order2", {1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2}), - toNullableVec("row_number", {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3})})); - - /***** rank, dense_rank *****/ - request = context.scan("test_db", "test_table_for_rank").sort({{"partition", false}, {"order", false}}, true).window({Rank(), DenseRank()}, {{"order", false}}, {{"partition", false}}, MockWindowFrame{}).build(context); - ASSERT_COLUMNS_EQ_R(executeStreams(request), - createColumns({toNullableVec("partition", {1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {1, 1, 2, 2, 1, 1, 2, 2}), - toNullableVec("rank", {1, 1, 3, 3, 1, 1, 3, 3}), - toNullableVec("dense_rank", {1, 1, 2, 2, 1, 1, 2, 2})})); - - // nullable - ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource(request, - {toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2})}), - createColumns({toNullableVec("partition", {{}, 1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {{}, 1, 1, 2, 2, 1, 1, 2, 2}), - toNullableVec("rank", {1, 1, 1, 3, 3, 1, 1, 3, 3}), - toNullableVec("dense_rank", {1, 1, 1, 2, 2, 1, 1, 2, 2})})); - - ASSERT_COLUMNS_EQ_R(executeStreamsWithSingleSource( - request, - {toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2})}), - createColumns({toNullableVec("partition", {{}, {}, 1, 1, 1, 1, 2, 2, 2, 2}), - toNullableVec("order", {{}, 1, 1, 1, 2, 2, 1, 1, 2, 2}), - toNullableVec("rank", {1, 2, 1, 1, 3, 3, 1, 1, 3, 3}), - toNullableVec("dense_rank", {1, 2, 1, 1, 2, 2, 1, 1, 2, 2})})); -} -CATCH - -} // namespace DB::tests From 72f7481fe641f3c3dc43bdc5c4984ac23fb8f45b Mon Sep 17 00:00:00 2001 From: jinhelin Date: Wed, 27 Jul 2022 18:09:01 +0800 Subject: [PATCH 06/17] Add metrics of Read Thread and Cache (#5476) Co-authored-by: Flowyi --- metrics/grafana/tiflash_summary.json | 135 +++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 0d72f950add..377cc8f0abc 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -5523,6 +5523,141 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 71 + }, + "hiddenSeries": false, + "id": 132, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:327", + "alias": "cache_hit_ratio", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy|sche_no_segment\"}[1m])) by (type)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_hit|get_cache_copy\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "get_cache_hit", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "get_cache_total", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_hit|get_cache_copy\"}[1m]))/sum(rate(tiflash_storage_read_thread_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"get_cache_miss|get_cache_hit|get_cache_part|get_cache_copy\"}[1m]))", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "cache_hit_ratio", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Read Thread and Cache", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:198", + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:199", + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "repeat": null, From 245e2fbc586ff11df309d1f01d878d59a88c387b Mon Sep 17 00:00:00 2001 From: bestwoody <89765764+bestwoody@users.noreply.github.com> Date: Wed, 27 Jul 2022 21:59:11 +0800 Subject: [PATCH 07/17] bugfix for core of GRPCCompQueue when tiflash shutdown (#5483) close pingcap/tiflash#5480 --- dbms/src/Flash/Mpp/GRPCCompletionQueuePool.cpp | 5 ++++- dbms/src/Flash/Mpp/GRPCCompletionQueuePool.h | 6 ++++++ dbms/src/Server/Server.cpp | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/dbms/src/Flash/Mpp/GRPCCompletionQueuePool.cpp b/dbms/src/Flash/Mpp/GRPCCompletionQueuePool.cpp index 02455b2b21d..13e1bdb902a 100644 --- a/dbms/src/Flash/Mpp/GRPCCompletionQueuePool.cpp +++ b/dbms/src/Flash/Mpp/GRPCCompletionQueuePool.cpp @@ -44,7 +44,10 @@ void GRPCCompletionQueuePool::thread(size_t index) { GET_METRIC(tiflash_thread_count, type_threads_of_client_cq_pool).Increment(); SCOPE_EXIT({ - GET_METRIC(tiflash_thread_count, type_threads_of_client_cq_pool).Decrement(); + if (!is_shutdown) + { + GET_METRIC(tiflash_thread_count, type_threads_of_client_cq_pool).Decrement(); + } }); auto & q = queues[index]; diff --git a/dbms/src/Flash/Mpp/GRPCCompletionQueuePool.h b/dbms/src/Flash/Mpp/GRPCCompletionQueuePool.h index b6592e27f3b..efa2e0ec113 100644 --- a/dbms/src/Flash/Mpp/GRPCCompletionQueuePool.h +++ b/dbms/src/Flash/Mpp/GRPCCompletionQueuePool.h @@ -39,10 +39,16 @@ class GRPCCompletionQueuePool ::grpc::CompletionQueue & pickQueue(); + void markShutdown() + { + is_shutdown = true; + } + private: void thread(size_t index); std::atomic next = 0; + std::atomic is_shutdown{false}; std::vector<::grpc::CompletionQueue> queues; std::vector workers; }; diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index c03f7b7133e..24b0dfd2a69 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -679,6 +679,8 @@ class Server::FlashGrpcServerHolder thread_manager->wait(); flash_grpc_server->Wait(); flash_grpc_server.reset(); + if (GRPCCompletionQueuePool::global_instance) + GRPCCompletionQueuePool::global_instance->markShutdown(); LOG_FMT_INFO(log, "Shut down flash grpc server"); /// Close flash service. From ad5201cb3e5baa429ef4180fd224514dcf6909b3 Mon Sep 17 00:00:00 2001 From: Wenxuan Date: Wed, 27 Jul 2022 23:15:11 +0800 Subject: [PATCH 08/17] test: Add tests case when deltaMergeBySplit + { flushCache | split } occurs simultaneously (#5454) ref pingcap/tiflash#5409 --- contrib/googletest | 2 +- dbms/CMakeLists.txt | 2 + dbms/src/Common/FailPoint.cpp | 13 +- dbms/src/Common/SyncPoint/Ctl.cpp | 119 +++++++++++++++++ dbms/src/Common/SyncPoint/Ctl.h | 94 ++++++++++++++ dbms/src/Common/SyncPoint/ScopeGuard.cpp | 50 ++++++++ dbms/src/Common/SyncPoint/ScopeGuard.h | 60 +++++++++ dbms/src/Common/SyncPoint/SyncChannel.h | 117 +++++++++++++++++ dbms/src/Common/SyncPoint/SyncPoint.h | 36 ++++++ dbms/src/Flash/Management/ManualCompact.cpp | 6 - .../Management/tests/gtest_manual_compact.cpp | 33 ++--- .../DeltaMerge/Delta/ColumnFileFlushTask.cpp | 3 + .../Storages/DeltaMerge/DeltaMergeStore.cpp | 5 + dbms/src/Storages/DeltaMerge/Segment.cpp | 3 + .../DeltaMerge/tests/MultiSegmentTestUtil.h | 43 +++++-- .../tests/gtest_dm_delta_merge_store.cpp | 120 ++++++++++++++++++ dbms/src/TestUtils/TiFlashTestBasic.h | 38 +++--- dbms/src/TestUtils/gtests_dbms_main.cpp | 34 +++++ 18 files changed, 712 insertions(+), 66 deletions(-) create mode 100644 dbms/src/Common/SyncPoint/Ctl.cpp create mode 100644 dbms/src/Common/SyncPoint/Ctl.h create mode 100644 dbms/src/Common/SyncPoint/ScopeGuard.cpp create mode 100644 dbms/src/Common/SyncPoint/ScopeGuard.h create mode 100644 dbms/src/Common/SyncPoint/SyncChannel.h create mode 100644 dbms/src/Common/SyncPoint/SyncPoint.h diff --git a/contrib/googletest b/contrib/googletest index d175c8bf823..2fe3bd994b3 160000 --- a/contrib/googletest +++ b/contrib/googletest @@ -1 +1 @@ -Subproject commit d175c8bf823e709d570772b038757fadf63bc632 +Subproject commit 2fe3bd994b3189899d93f1d5a881e725e046fdc2 diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 3e38c2a9fdb..a6ba4ea4618 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -57,6 +57,7 @@ include(${TiFlash_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(clickhouse_common_io src/Common) add_headers_and_sources(clickhouse_common_io src/Common/HashTable) +add_headers_and_sources(clickhouse_common_io src/Common/SyncPoint) add_headers_and_sources(clickhouse_common_io src/IO) add_headers_and_sources(dbms src/Analyzers) @@ -272,6 +273,7 @@ if (ENABLE_TESTS) include (${TiFlash_SOURCE_DIR}/cmake/find_gtest.cmake) if (USE_INTERNAL_GTEST_LIBRARY) + set(INSTALL_GTEST OFF) # Google Test from sources add_subdirectory(${TiFlash_SOURCE_DIR}/contrib/googletest/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest) # avoid problems with diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index fe04d68eac8..769724052f7 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -104,13 +104,12 @@ std::unordered_map> FailPointHelper::f M(pause_until_apply_raft_snapshot) \ M(pause_after_copr_streams_acquired_once) -#define APPLY_FOR_PAUSEABLE_FAILPOINTS(M) \ - M(pause_when_reading_from_dt_stream) \ - M(pause_when_writing_to_dt_store) \ - M(pause_when_ingesting_to_dt_store) \ - M(pause_when_altering_dt_store) \ - M(pause_after_copr_streams_acquired) \ - M(pause_before_server_merge_one_delta) \ +#define APPLY_FOR_PAUSEABLE_FAILPOINTS(M) \ + M(pause_when_reading_from_dt_stream) \ + M(pause_when_writing_to_dt_store) \ + M(pause_when_ingesting_to_dt_store) \ + M(pause_when_altering_dt_store) \ + M(pause_after_copr_streams_acquired) \ M(pause_query_init) diff --git a/dbms/src/Common/SyncPoint/Ctl.cpp b/dbms/src/Common/SyncPoint/Ctl.cpp new file mode 100644 index 00000000000..a13ea936f0f --- /dev/null +++ b/dbms/src/Common/SyncPoint/Ctl.cpp @@ -0,0 +1,119 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +#ifdef FIU_ENABLE + +void SyncPointCtl::enable(const char * name) +{ + { + std::unique_lock lock(mu); + channels.try_emplace(name, + std::make_pair( + std::make_shared(), + std::make_shared())); + } + fiu_enable(name, 1, nullptr, 0); + LOG_FMT_DEBUG(getLogger(), "Enabled: {}", name); +} + +void SyncPointCtl::disable(const char * name) +{ + fiu_disable(name); + { + std::unique_lock lock(mu); + if (auto const & iter = channels.find(name); iter != channels.end()) + { + auto [first_ch, second_ch] = iter->second; + first_ch->close(); + second_ch->close(); + channels.erase(iter); + } + } + LOG_FMT_DEBUG(getLogger(), "Disabled: {}", name); +} + +std::pair SyncPointCtl::mustGetChannel(const char * name) +{ + std::unique_lock lock(mu); + if (auto iter = channels.find(name); iter == channels.end()) + { + throw Exception(fmt::format("SyncPoint {} is not enabled", name)); + } + else + { + return iter->second; + } +} + +void SyncPointCtl::waitAndPause(const char * name) +{ + auto ch = mustGetChannel(name).first; + LOG_FMT_DEBUG(getLogger(), "waitAndPause({}) waiting...", name); + auto result = ch->recv(); + LOG_FMT_DEBUG(getLogger(), "waitAndPause({}) {}", name, result ? "finished" : "cancelled"); +} + +void SyncPointCtl::next(const char * name) +{ + auto ch = mustGetChannel(name).second; + LOG_FMT_DEBUG(getLogger(), "next({}) trying...", name); + auto result = ch->send(); + LOG_FMT_DEBUG(getLogger(), "next({}) {}", name, result ? "done" : "cancelled"); +} + +void SyncPointCtl::sync(const char * name) +{ + auto [ch_1, ch_2] = mustGetChannel(name); + // Print a stack, which is helpful to know where undesired SYNC_FOR comes from. + LOG_FMT_DEBUG(getLogger(), "SYNC_FOR({}) trying... \n\n# Current Stack: {}", name, StackTrace().toString()); + auto result = ch_1->send(); + LOG_FMT_DEBUG(getLogger(), "SYNC_FOR({}) {}", name, // + result ? "matched waitAndPause(), paused until calling next()..." : "cancelled"); + if (!result) + return; + result = ch_2->recv(); + LOG_FMT_DEBUG(getLogger(), "SYNC_FOR({}) {}", name, result ? "done" : "cancelled"); +} + +#else + +void SyncPointCtl::enable(const char *) +{} + +void SyncPointCtl::disable(const char *) {} + +void SyncPointCtl::waitAndPause(const char *) {} + +void SyncPointCtl::next(const char *) {} + +void SyncPointCtl::sync(const char *) {} + +#endif + +SyncPointScopeGuard SyncPointCtl::enableInScope(const char * name) +{ + return SyncPointScopeGuard(name); +} + +} // namespace DB diff --git a/dbms/src/Common/SyncPoint/Ctl.h b/dbms/src/Common/SyncPoint/Ctl.h new file mode 100644 index 00000000000..594da3c102b --- /dev/null +++ b/dbms/src/Common/SyncPoint/Ctl.h @@ -0,0 +1,94 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include +#include +#include + +namespace DB +{ + +class SyncPointCtl +{ +public: + /** + * Enable the sync point. After enabling, when executed to the sync point defined with `SYNC_FOR()`, + * the execution will be suspended, until `waitAndPause()` or `waitAndNext()` is called + * somewhere (e.g. in tests). + */ + static void enable(const char * name); + + /** + * Disable the sync point. Existing suspends will be continued. + */ + static void disable(const char * name); + + /** + * Suspend the execution, until `waitAndPause()`, `next()` or `waitAndNext()` is called somewhere. + * You should not invoke this function directly. Invoke `SYNC_FOR()` instead. + */ + static void sync(const char * name); + + /** + * Wait for the sync point being executed. The code at the sync point will keep + * pausing until you call `next()`. + */ + static void waitAndPause(const char * name); + + /** + * Continue the execution after the specified sync point. + * You must first `waitAndPause()` for it, then `next()` it. + */ + static void next(const char * name); + + /** + * Wait for the sync point being executed. After that, continue the execution after the sync point. + */ + static void waitAndNext(const char * name) + { + waitAndPause(name); + next(name); + } + + /** + * Enable the sync point in the current scope. When scope exits, the sync point will be disabled. + * + * After enabling, when executed to the sync point defined with `SYNC_FOR()`, the execution + * will be suspended, until `waitAndPause()` or `waitAndNext()` is called somewhere (e.g. in tests). + */ + static SyncPointScopeGuard enableInScope(const char * name); + +private: + class SyncChannel; + using SyncChannelPtr = std::shared_ptr; + + static Poco::Logger * getLogger() + { + static Poco::Logger * logger = &Poco::Logger::get("SyncPointCtl"); + return logger; + } + + static std::pair mustGetChannel(const char * name); + + inline static std::unordered_map> + channels{}; + inline static std::mutex mu{}; +}; + +} // namespace DB diff --git a/dbms/src/Common/SyncPoint/ScopeGuard.cpp b/dbms/src/Common/SyncPoint/ScopeGuard.cpp new file mode 100644 index 00000000000..feb174c2c9f --- /dev/null +++ b/dbms/src/Common/SyncPoint/ScopeGuard.cpp @@ -0,0 +1,50 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ + +SyncPointScopeGuard::SyncPointScopeGuard(const char * name_) + : name(name_) +{ + SyncPointCtl::enable(name_); +} + +void SyncPointScopeGuard::disable() +{ + if (disabled) + return; + SyncPointCtl::disable(name.c_str()); + disabled = true; +} + +void SyncPointScopeGuard::waitAndPause() +{ + SyncPointCtl::waitAndPause(name.c_str()); +} + +void SyncPointScopeGuard::next() +{ + SyncPointCtl::next(name.c_str()); +} + +void SyncPointScopeGuard::waitAndNext() +{ + SyncPointCtl::waitAndNext(name.c_str()); +} + +} // namespace DB diff --git a/dbms/src/Common/SyncPoint/ScopeGuard.h b/dbms/src/Common/SyncPoint/ScopeGuard.h new file mode 100644 index 00000000000..c070365d380 --- /dev/null +++ b/dbms/src/Common/SyncPoint/ScopeGuard.h @@ -0,0 +1,60 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB +{ + +class SyncPointScopeGuard +{ +public: + explicit SyncPointScopeGuard(const char * name_); + + ~SyncPointScopeGuard() + { + disable(); + } + + /** + * Disable this sync point beforehand, instead of at the moment when + * this scope guard is destructed. + */ + void disable(); + + /** + * Wait for the sync point being executed. The code at the sync point will keep + * pausing until you call `next()`. + */ + void waitAndPause(); + + /** + * Continue the execution after the specified sync point. + * You must first `waitAndPause()` for it, then `next()` it. + */ + void next(); + + /** + * Wait for the sync point being executed. After that, continue the execution after the sync point. + */ + void waitAndNext(); + +private: + std::string name; + bool disabled = false; +}; + +} // namespace DB diff --git a/dbms/src/Common/SyncPoint/SyncChannel.h b/dbms/src/Common/SyncPoint/SyncChannel.h new file mode 100644 index 00000000000..dc4e2cce145 --- /dev/null +++ b/dbms/src/Common/SyncPoint/SyncChannel.h @@ -0,0 +1,117 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include + +namespace DB +{ + +class SyncPointCtl::SyncChannel +{ +public: + /** + * Copy and move are disallowed. A single SyncChannel instance can be shared for multiple threads. + */ + DISALLOW_COPY_AND_MOVE(SyncChannel); + + explicit SyncChannel() = default; + + ~SyncChannel() + { + close(); + // It is possible that there are `recv()` or `send()` running or blocked. + // They should exit when receiving the close signal from `cv`. + // Let's simply wait them to finish. This ensures that memory is always released after + // no existing function is running anymore. + while (pending_op > 0) {} + } + + void close() + { + pending_op++; + { + std::lock_guard lock_cv(m_cv); + is_closing = true; + cv.notify_all(); + } + pending_op--; + } + + /** + * Blocked until one send() is called, or channel is closed. + */ + bool recv() + { + pending_op++; + // wrap a scope for locks to ensure no more access to the member after pending_op-- + auto is_wait_fulfilled = [this]() { + std::unique_lock lock_recv(m_recv); + std::unique_lock lock_cv(m_cv); + has_receiver = true; + cv.notify_all(); + cv.wait(lock_cv, [this] { + return has_data || is_closing; + }); + if (is_closing) + return false; + has_data = false; // consumes one data + has_receiver = false; + return true; + }(); + pending_op--; + return is_wait_fulfilled; + } + + /** + * Blocked until there is a receiver, or channel is closed. + * Queued if multiple send() is called concurrently. + */ + bool send() + { + pending_op++; + auto is_wait_fulfilled = [this]() { + std::unique_lock lock_send(m_send); + std::unique_lock lock_cv(m_cv); + cv.wait(lock_cv, [this] { + return (has_receiver && !has_data) || is_closing; + }); + if (is_closing) + return false; + has_data = true; + cv.notify_all(); + return true; + }(); + pending_op--; + return is_wait_fulfilled; + } + +private: + bool has_receiver = false; + bool has_data = false; + bool is_closing = false; + + std::atomic pending_op = 0; + + std::mutex m_send; + std::mutex m_recv; + std::mutex m_cv; + std::condition_variable cv; +}; + +} // namespace DB diff --git a/dbms/src/Common/SyncPoint/SyncPoint.h b/dbms/src/Common/SyncPoint/SyncPoint.h new file mode 100644 index 00000000000..6ebe3b301e3 --- /dev/null +++ b/dbms/src/Common/SyncPoint/SyncPoint.h @@ -0,0 +1,36 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +// Expose publicly +#include +#include +// ======= + +#include + +namespace DB +{ + +/** + * Suspend the execution (when enabled), until `SyncPointCtl::waitAndPause()`, + * `SyncPointCtl::next()` or `SyncPointCtl::waitAndNext()` is called somewhere + * (e.g. in tests). + * + * Usually this is invoked in actual business logics. + */ +#define SYNC_FOR(name) fiu_do_on(name, SyncPointCtl::sync(name);) + +} // namespace DB diff --git a/dbms/src/Flash/Management/ManualCompact.cpp b/dbms/src/Flash/Management/ManualCompact.cpp index 54373fe4c79..2143be88cc3 100644 --- a/dbms/src/Flash/Management/ManualCompact.cpp +++ b/dbms/src/Flash/Management/ManualCompact.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include #include @@ -25,10 +24,6 @@ namespace DB { -namespace FailPoints -{ -extern const char pause_before_server_merge_one_delta[]; -} // namespace FailPoints namespace Management { @@ -172,7 +167,6 @@ grpc::Status ManualCompactManager::doWork(const ::kvrpcpb::CompactRequest * requ // Repeatedly merge multiple segments as much as possible. while (true) { - FAIL_POINT_PAUSE(FailPoints::pause_before_server_merge_one_delta); auto compacted_range = dm_storage->mergeDeltaBySegment(global_context, start_key, DM::DeltaMergeStore::TaskRunThread::ForegroundRPC); if (compacted_range == std::nullopt) diff --git a/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp b/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp index 1e9da93ffe3..517e536f2be 100644 --- a/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp +++ b/dbms/src/Flash/Management/tests/gtest_manual_compact.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include #include #include @@ -31,10 +31,6 @@ namespace DB { -namespace FailPoints -{ -extern const char pause_before_server_merge_one_delta[]; -} // namespace FailPoints namespace tests { @@ -336,18 +332,12 @@ CATCH TEST_P(BasicManualCompactTest, DuplicatedLogicalId) try { - using namespace std::chrono_literals; - - FailPointHelper::enableFailPoint(FailPoints::pause_before_server_merge_one_delta); - - auto thread_1_is_ready = std::promise(); - std::thread t_req1([&]() { - // req1 + auto sp_req1_merge_delta = SyncPointCtl::enableInScope("before_DeltaMergeStore::mergeDeltaBySegment"); + auto req1 = std::async([&]() { auto request = ::kvrpcpb::CompactRequest(); request.set_physical_table_id(TABLE_ID); request.set_logical_table_id(2); auto response = ::kvrpcpb::CompactResponse(); - thread_1_is_ready.set_value(); auto status_code = manager->handleRequest(&request, &response); ASSERT_EQ(status_code.error_code(), grpc::StatusCode::OK); ASSERT_FALSE(response.has_error()); @@ -356,13 +346,12 @@ try helper->verifyExpectedRowsForAllSegments(); }); - { - // send req1, wait request being processed. - thread_1_is_ready.get_future().wait(); - std::this_thread::sleep_for(500ms); // TODO: Maybe better to use sync_channel to avoid hardcoded wait duration. + sp_req1_merge_delta.waitAndPause(); - // req2: Now let's send another request with the same logical id. - // Although worker pool size is 1, this request will be returned immediately, but with an error. + // req2: Another request with the same logical id. + // Although worker pool size is 1, this request will be returned immediately with an error, + // because there is already same logic id working in progress. + { auto request = ::kvrpcpb::CompactRequest(); request.set_physical_table_id(TABLE_ID); request.set_logical_table_id(2); @@ -374,9 +363,9 @@ try helper->verifyExpectedRowsForAllSegments(); } - // Now let's continue req1's work - FailPointHelper::disableFailPoint(FailPoints::pause_before_server_merge_one_delta); - t_req1.join(); + // Proceed the execution of req1. Everything should work normally. + sp_req1_merge_delta.next(); + req1.wait(); } CATCH diff --git a/dbms/src/Storages/DeltaMerge/Delta/ColumnFileFlushTask.cpp b/dbms/src/Storages/DeltaMerge/Delta/ColumnFileFlushTask.cpp index 53c2e901a5a..373efa10445 100644 --- a/dbms/src/Storages/DeltaMerge/Delta/ColumnFileFlushTask.cpp +++ b/dbms/src/Storages/DeltaMerge/Delta/ColumnFileFlushTask.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -61,6 +62,8 @@ DeltaIndex::Updates ColumnFileFlushTask::prepare(WriteBatches & wbs) bool ColumnFileFlushTask::commit(ColumnFilePersistedSetPtr & persisted_file_set, WriteBatches & wbs) { + SYNC_FOR("before_ColumnFileFlushTask::commit"); + if (!persisted_file_set->checkAndIncreaseFlushVersion(flush_version)) return false; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index a0d476f774a..8eaa5e4effe 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1047,6 +1048,8 @@ void DeltaMergeStore::mergeDeltaAll(const Context & context) std::optional DeltaMergeStore::mergeDeltaBySegment(const Context & context, const RowKeyValue & start_key, const TaskRunThread run_thread) { + SYNC_FOR("before_DeltaMergeStore::mergeDeltaBySegment"); + updateGCSafePoint(); auto dm_context = newDMContext(context, context.getSettingsRef(), /*tracing_id*/ fmt::format("mergeDeltaBySegment_{}", latest_gc_safe_point.load(std::memory_order_relaxed))); @@ -1082,6 +1085,8 @@ std::optional DeltaMergeStore::mergeDeltaBySegment(const Contex } // else: sleep and retry } // else: sleep and retry + SYNC_FOR("before_DeltaMergeStore::mergeDeltaBySegment|retry_segment"); + // Typical cases: // #1. flushCache failed // - The segment is abandoned (due to segment updated) diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 91d65b7fee2..a87b81b7e0d 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -894,6 +895,8 @@ std::optional Segment::prepareSplit(DMContext & dm_context, const SegmentSnapshotPtr & segment_snap, WriteBatches & wbs) const { + SYNC_FOR("before_Segment::prepareSplit"); + if (!dm_context.enable_logical_split // || segment_snap->stable->getPacks() <= 3 // || segment_snap->delta->getRows() > segment_snap->stable->getRows()) diff --git a/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h b/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h index 787a521ded3..100f27912e6 100644 --- a/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h +++ b/dbms/src/Storages/DeltaMerge/tests/MultiSegmentTestUtil.h @@ -70,13 +70,33 @@ class MultiSegmentTestUtil : private boost::noncopyable FailPointHelper::disableFailPoint(FailPoints::skip_check_segment_update); } + void resetExpectedRows() + { + auto * log = &Poco::Logger::get(tracing_id); + + rows_by_segments.clear(); + expected_stable_rows.clear(); + expected_delta_rows.clear(); + + std::shared_lock lock(store->read_write_mutex); + auto segment_idx = 0; + for (auto & [_key, seg] : store->segments) + { + UNUSED(_key); + LOG_FMT_INFO(log, "Segment #{}: Range = {}", segment_idx, seg->getRowKeyRange().toDebugString()); + rows_by_segments[segment_idx] = seg->getStable()->getRows(); + expected_stable_rows[segment_idx] = seg->getStable()->getRows(); + expected_delta_rows[segment_idx] = seg->getDelta()->getRows(); + segment_idx++; + } + } + /// Prepare segments * 4. The rows of each segment will be roughly close to n_avg_rows_per_segment. /// The exact rows will be recorded in rows_by_segments. void prepareSegments(DeltaMergeStorePtr store_, size_t n_avg_rows_per_segment, DMTestEnv::PkType pk_type) { store = store_; - auto * log = &Poco::Logger::get(tracing_id); auto dm_context = store->newDMContext(db_context, db_context.getSettingsRef(), /*tracing_id*/ tracing_id); { // Write [0, 4*N) data with tso=2. @@ -105,19 +125,16 @@ class MultiSegmentTestUtil : private boost::noncopyable { std::shared_lock lock(store->read_write_mutex); // Now we have 4 segments. + resetExpectedRows(); + ASSERT_EQ(rows_by_segments.size(), 4); + + // Verify our expectations. auto total_stable_rows = 0; - auto segment_idx = 0; - for (auto & [_key, seg] : store->segments) + for (size_t i = 0; i < rows_by_segments.size(); i++) { - (void)_key; - LOG_FMT_INFO(log, "Segment #{}: Range = {}", segment_idx, seg->getRowKeyRange().toDebugString()); - ASSERT_EQ(seg->getDelta()->getRows(), 0); - ASSERT_GT(seg->getStable()->getRows(), 0); // We don't check the exact rows of each segment. - total_stable_rows += seg->getStable()->getRows(); - rows_by_segments[segment_idx] = seg->getStable()->getRows(); - expected_stable_rows[segment_idx] = seg->getStable()->getRows(); - expected_delta_rows[segment_idx] = seg->getDelta()->getRows(); // = 0 - segment_idx++; + ASSERT_EQ(expected_delta_rows[i], 0); + ASSERT_GT(expected_stable_rows[i], 0); // We don't check the exact rows of each segment. + total_stable_rows += expected_stable_rows[i]; } ASSERT_EQ(total_stable_rows, 4 * n_avg_rows_per_segment); } @@ -145,7 +162,7 @@ class MultiSegmentTestUtil : private boost::noncopyable void verifyExpectedRowsForAllSegments() { std::shared_lock lock(store->read_write_mutex); - ASSERT_EQ(store->segments.size(), 4); + ASSERT_EQ(store->segments.size(), expected_delta_rows.size()); auto segment_idx = 0; for (auto & [_key, seg] : store->segments) { diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 89fc676dfee..19b9e73f4e7 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -14,10 +14,13 @@ #include #include +#include #include #include #include +#include + namespace DB { namespace FailPoints @@ -3744,6 +3747,123 @@ try CATCH +// There is another flush cache executing for the same segment. +TEST_P(DeltaMergeStoreMergeDeltaBySegmentTest, RetryByFlushCache) +try +{ + { + // Write new data to segment[1] without flush. + auto newly_written_rows = helper->rows_by_segments[1]; + Block block = DMTestEnv::prepareSimpleWriteBlock(helper->rows_by_segments[0], helper->rows_by_segments[0] + newly_written_rows, false, pk_type, 10 /* new tso */); + store->write(*db_context, db_context->getSettingsRef(), block); + helper->expected_delta_rows[1] += helper->rows_by_segments[1]; + helper->verifyExpectedRowsForAllSegments(); + } + + auto sp_flush_commit = SyncPointCtl::enableInScope("before_ColumnFileFlushTask::commit"); + auto sp_merge_delta_retry = SyncPointCtl::enableInScope("before_DeltaMergeStore::mergeDeltaBySegment|retry_segment"); + + // Start a flush and suspend it before flushCommit. + auto th_flush = std::async([&]() { + auto dm_context = store->newDMContext(*db_context, db_context->getSettingsRef(), "test"); + auto segment1 = std::next(store->segments.begin())->second; + auto result = segment1->flushCache(*dm_context); + ASSERT_TRUE(result); + ASSERT_EQ(segment1->getDelta()->getUnsavedRows(), 0); + // There should be still rows in the delta layer. + ASSERT_GT(segment1->getDelta()->getRows(), 0); + helper->verifyExpectedRowsForAllSegments(); + }); + sp_flush_commit.waitAndPause(); + + // Start a mergeDelta. It should hit retry immediately due to a flush is in progress. + auto th_merge_delta = std::async([&]() { + auto segment1 = std::next(store->segments.begin())->second; + auto result = store->mergeDeltaBySegment(*db_context, segment1->getRowKeyRange().start, DeltaMergeStore::TaskRunThread::Foreground); + ASSERT_NE(result, std::nullopt); + // All rows in the delta layer should be merged into the stable layer. + helper->expected_stable_rows[1] += helper->expected_delta_rows[1]; + helper->expected_delta_rows[1] = 0; + helper->verifyExpectedRowsForAllSegments(); + }); + sp_merge_delta_retry.waitAndPause(); + + // Let's finish the flush. + sp_flush_commit.next(); + th_flush.wait(); + + // Proceed the mergeDelta retry. Retry should succeed without triggering any new flush. + sp_merge_delta_retry.next(); + th_merge_delta.wait(); +} +CATCH + + +// The segment is splitted during the execution. +TEST_P(DeltaMergeStoreMergeDeltaBySegmentTest, RetryBySplit) +try +{ + auto sp_split_prepare = SyncPointCtl::enableInScope("before_Segment::prepareSplit"); + auto sp_merge_delta_retry = SyncPointCtl::enableInScope("before_DeltaMergeStore::mergeDeltaBySegment|retry_segment"); + + // Start a split and suspend it during prepareSplit to simulate a long-running split. + auto th_split = std::async([&] { + auto old_rows_by_segments = helper->rows_by_segments; + ASSERT_EQ(4, old_rows_by_segments.size()); + + // Split segment1 into 2. + auto dm_context = store->newDMContext(*db_context, db_context->getSettingsRef(), "test"); + auto segment1 = std::next(store->segments.begin())->second; + auto result = store->segmentSplit(*dm_context, segment1, /*is_foreground*/ true); + ASSERT_NE(result.second, nullptr); + + helper->resetExpectedRows(); + ASSERT_EQ(5, helper->rows_by_segments.size()); + ASSERT_EQ(old_rows_by_segments[0], helper->rows_by_segments[0]); + ASSERT_EQ(old_rows_by_segments[1], helper->rows_by_segments[1] + helper->rows_by_segments[2]); + ASSERT_EQ(old_rows_by_segments[2], helper->rows_by_segments[3]); + ASSERT_EQ(old_rows_by_segments[3], helper->rows_by_segments[4]); + }); + sp_split_prepare.waitAndPause(); + + // Start a mergeDelta. As there is a split in progress, we would expect several retries. + auto th_merge_delta = std::async([&] { + // mergeDeltaBySegment for segment1 + auto segment1 = std::next(store->segments.begin())->second; + auto result = store->mergeDeltaBySegment(*db_context, segment1->getRowKeyRange().start, DeltaMergeStore::TaskRunThread::Foreground); + ASSERT_NE(result, std::nullopt); + + // Although original segment1 has been split into 2, we still expect only segment1's delta + // was merged. + ASSERT_EQ(5, helper->rows_by_segments.size()); + helper->expected_stable_rows[1] += helper->expected_delta_rows[1]; + helper->expected_delta_rows[1] = 0; + helper->verifyExpectedRowsForAllSegments(); + }); + sp_merge_delta_retry.waitAndNext(); + sp_merge_delta_retry.waitAndNext(); + sp_merge_delta_retry.waitAndPause(); + + // Proceed and finish the split. + sp_split_prepare.next(); + th_split.wait(); + { + // Write to the new segment1 + segment2 after split. + auto newly_written_rows = helper->rows_by_segments[1] + helper->rows_by_segments[2]; + Block block = DMTestEnv::prepareSimpleWriteBlock(helper->rows_by_segments[0], helper->rows_by_segments[0] + newly_written_rows, false, pk_type, 10 /* new tso */); + store->write(*db_context, db_context->getSettingsRef(), block); + helper->expected_delta_rows[1] += helper->rows_by_segments[1]; + helper->expected_delta_rows[2] += helper->rows_by_segments[2]; + helper->verifyExpectedRowsForAllSegments(); + } + + // This time the retry should succeed without any future retries. + sp_merge_delta_retry.next(); + th_merge_delta.wait(); +} +CATCH + + } // namespace tests } // namespace DM } // namespace DB diff --git a/dbms/src/TestUtils/TiFlashTestBasic.h b/dbms/src/TestUtils/TiFlashTestBasic.h index b22c2ddcf96..91c2cc1d061 100644 --- a/dbms/src/TestUtils/TiFlashTestBasic.h +++ b/dbms/src/TestUtils/TiFlashTestBasic.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -50,23 +51,26 @@ namespace DB { namespace tests { -#define CATCH \ - catch (const DB::tests::TiFlashTestException & e) \ - { \ - std::string text = e.displayText(); \ - text += "\n\n"; \ - if (text.find("Stack trace") == std::string::npos) \ - text += fmt::format("Stack trace:\n{}\n", e.getStackTrace().toString()); \ - FAIL() << text; \ - } \ - catch (const DB::Exception & e) \ - { \ - std::string text = e.displayText(); \ - fmt::print(stderr, "Code: {}. {}\n\n", e.code(), text); \ - auto embedded_stack_trace_pos = text.find("Stack trace"); \ - if (std::string::npos == embedded_stack_trace_pos) \ - fmt::print(stderr, "Stack trace:\n{}\n", e.getStackTrace().toString()); \ - throw; \ +#define CATCH \ + catch (const ::DB::tests::TiFlashTestException & e) \ + { \ + std::string text = e.displayText(); \ + text += "\n\n"; \ + if (text.find("Stack trace") == std::string::npos) \ + text += fmt::format("Stack trace:\n{}\n", e.getStackTrace().toString()); \ + FAIL() << text; \ + } \ + catch (const ::DB::Exception & e) \ + { \ + std::string text = fmt::format("Code: {}. {}\n\n", e.code(), e.displayText()); \ + if (text.find("Stack trace") == std::string::npos) \ + text += fmt::format("Stack trace:\n{}\n", e.getStackTrace().toString()); \ + FAIL() << text; \ + } \ + catch (...) \ + { \ + ::DB::tryLogCurrentException(__PRETTY_FUNCTION__); \ + FAIL(); \ } /// helper functions for comparing DataType diff --git a/dbms/src/TestUtils/gtests_dbms_main.cpp b/dbms/src/TestUtils/gtests_dbms_main.cpp index 9c53ccb9084..84c32ea40d3 100644 --- a/dbms/src/TestUtils/gtests_dbms_main.cpp +++ b/dbms/src/TestUtils/gtests_dbms_main.cpp @@ -17,15 +17,47 @@ #include #include #include +#include +#include namespace DB::FailPoints { extern const char force_set_dtfile_exist_when_acquire_id[]; } // namespace DB::FailPoints +void fault_signal_handler(int signum) +{ + ::signal(signum, SIG_DFL); + std::cerr << "Received signal " << strsignal(signum) << std::endl; + std::cerr << StackTrace().toString() << std::endl; + ::raise(signum); +} + +void install_fault_signal_handlers(std::initializer_list signums) +{ + for (auto signum : signums) + { + ::signal(signum, fault_signal_handler); + } +} + +class ThrowListener : public testing::EmptyTestEventListener +{ + void OnTestPartResult(const testing::TestPartResult & result) override + { + if (result.type() == testing::TestPartResult::kFatalFailure) + { + throw ::testing::AssertionException(result); + } + } +}; + + // TODO: Optmize set-up & tear-down process which may cost more than 2s. It's NOT friendly for gtest_parallel. int main(int argc, char ** argv) { + install_fault_signal_handlers({SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGTERM}); + DB::tests::TiFlashTestEnv::setupLogger(); DB::tests::TiFlashTestEnv::initializeGlobalContext(); DB::ServerInfo server_info; @@ -40,6 +72,8 @@ int main(int argc, char ** argv) #endif ::testing::InitGoogleTest(&argc, argv); + ::testing::UnitTest::GetInstance()->listeners().Append(new ThrowListener); + auto ret = RUN_ALL_TESTS(); DB::tests::TiFlashTestEnv::shutdown(); From bcb029eaa1e0bc616b279dfc37232e823f987d76 Mon Sep 17 00:00:00 2001 From: jinhelin Date: Thu, 28 Jul 2022 12:27:11 +0800 Subject: [PATCH 09/17] Design doc: DeltaTree Read Thread Pool and Data Sharing (#5478) ref pingcap/tiflash#5488 --- ...07-25-read-thread-pool-and-data-sharing.md | 91 ++++++++++++++++++ ...7-25-read-thread-pool-and-data-sharing.png | Bin 0 -> 42573 bytes 2 files changed, 91 insertions(+) create mode 100644 docs/design/2022-07-25-read-thread-pool-and-data-sharing.md create mode 100644 docs/design/images/2022-07-25-read-thread-pool-and-data-sharing.png diff --git a/docs/design/2022-07-25-read-thread-pool-and-data-sharing.md b/docs/design/2022-07-25-read-thread-pool-and-data-sharing.md new file mode 100644 index 00000000000..2cf57f8283b --- /dev/null +++ b/docs/design/2022-07-25-read-thread-pool-and-data-sharing.md @@ -0,0 +1,91 @@ +# DeltaTree Read Thread Pool and Data Sharing + +- Author: [Jinhe Lin](https://github.com/JinheLin) + +## Table of Contents + +* [Introduction](#introduction) +* [Motivation or Background](#motivation-or-background) +* [Detailed Design](#detailed-design) +* [Impacts & Risks](#impacts--risks) + +## Introduction + +DeltaTree is the columnar storage engine of TiFlash. + +**Read Thread Pool** is a new component of DeltaTree that responsible for reading data and decoupling the computing layer and the storage layer. + +At the same time, in order to improve the performance of multi-concurrent table scan, the threads of the storage layer will schedule and try to combine data read requests according to the characteristics of analytical requests, so that concurrent table scans on the same table can share data read operations and reduce duplication of work. We calls this feature **Data Sharing**. + +## Motivation or Background + +### Read Thread Model + +The read thread model of DeltaTree inherits from ClickHouse, which is relatively simple and straightforward: + +1. Each request creates a batch of threads for processing, which can be configured by `profiles.default.max_threads`, the default is the number of physical CPU cores.(Recently, the computing layer has added thread pool to reuse threads and avoid OOT, but for the storage layer, the logic has not changed.) + +2. Some computations and I/Os are done in the same thread, computations and I/Os cannot be parallelized. + +The benefit of this threading model is simplicity, but it also causes some problems: + +1. At low concurrency, the number of threads is small, I/O and CPU cannot be fully utilized. + +2. At high concurrency, there are too many threads, and resource contention also affects performance. + +Therefore, we hope to control the concurrency of table scanning by adding a read thread pool inside the storage layer, and at the same time, we can do some scheduling and merging of read requests to improve the performance of concurrent table scanning. + +### Data Sharing + +DeltaTree currently lacks a mechanism like data buffer pool to optimize repeated access to the same data. Also, most of the data buffer pool is designed for the hot data of OLTP workload, and the access mode of OLAP workload is quite different from that of OLTP workload. + +By referring to the papers [Cooperative Scans](https://www.vldb.org/conf/2007/papers/research/p723-zukowski.pdf) and [From Cooperative Scans to Predictive Buffer Management](http://vldb.org/pvldb/vol5/p1759_michalswitakowski_vldb2012.pdf), we propose a "Data Sharing" strategy suitable for OLAP workloads to optimize the overhead of repeatedly accessing the same data of concurrent requests. + +Data Sharing allows concurrent requests to share the reading data, reducing repetitive work such as I/O and decoding. + +## Detailed Design + +### Overview + +From the high level of view, the reading of DeltaTree is divided into two steps: + +1. Call `DeltaMergeStore::read` to create multiple `BlockInputStreams`s: obtain the `Segment`s to be read and create snapshots. + +2. Call `BlockInputStream::read` to read data, one `Block` object at a time. + +The detailed process is shown in the following figure: + +![](./images/2022-07-25-read-thread-pool-and-data-sharing.png) + +1. When calling `DeltaMergeStore::read` to create multiple `BlockInputStream`s, register the segments to be read to `SegmentReadTaskScheduler`. +2. The logic of the `BlockInputStream` is relatively simple, just pop the `Block` objects from a `Block` queue. +3. Scheduler thread is responsible for scheduling read tasks on `SegmentReadTaskScheduler`. During scheduling, concurrent read of the same segment will be merged, and read requests will be sent to the read thread pool through the queue for processing. +4. After the read thread pool reads the block from the segment, push it to the block queue. + +### ReadThreadPool + +In order to avoid segments are processed across different NUMA nodes that affects performance : +1. Group the read threads according to NUMA nodes and bind them to the corresponding CPU. +2. The read task is hashed according to the segment's id, so it can be always processed by the same NUMA nodes. + +### SegmentReadTaskScheduler + +1. The purpose of `SegmentReadScheduler` is to allow the same segment of different read requests to be executed at the same time as much as possible to increase the opportunity of sharing the same data. +2. When a `BlockInputStream` is created, all relevant segments will be registered to the `SegmentReadTaskScheduler`. `SegmentReadScheduler` maintains all the segments' read information of this process. +3. The basic logic of scheduling is: + 1. Select a read request that needs to be scheduled. + 2. Select one segment of this read request, and consider merging other read requests that want to read this segment. + 3. Push the segment to the task queue of read threads. + +### DataSharingCache + +The data read by Segment includes two parts: Delta layer and Stable layer. The data of the Delta layer is generally relatively small, and currently only the data read of the Stable layer is shared. The data of the Stable layer is stored in `DMFile`, so data sharing is mainly data sharing between different `DMFileReader`s of the same `DMFile`. +1. Each `DMFileReader` holds a `DataSharingCache` to store data that may be read in the future. +2. When a `DMFileReader` successfully reads a column's data block, it tries to send it to the cache of other `DMFileReader`s. All the `DMFileReader`s are in indexed by a global object call `DMFileReaderPool`. +3. When `DMFileReader` reads, it will first try to read from the cache, and then read from the disk if it cannot read. + +## Impacts & Risks + +Since the scheduling order of segments is not guaranteed, the order of blocks that returned by a `BlockInputStream` is no longer guaranteed in order. This may break some special optimizations of the optimizer in some special queries, such as `SELECT MAX(pk) FROM t` ('pk' is the primary key of the table). + +In order to be compatible with these special queries (in fact, most of TiFlash's queries do not rely on the order of blocks), storage engine will check whether the query plan needs to scan the table sequentially. If the query plan needs to scan the table sequentially, it scans the table in the old way. \ No newline at end of file diff --git a/docs/design/images/2022-07-25-read-thread-pool-and-data-sharing.png b/docs/design/images/2022-07-25-read-thread-pool-and-data-sharing.png new file mode 100644 index 0000000000000000000000000000000000000000..b90a65f632eed3f8a97c353d1e9a3fb99d2a7f3f GIT binary patch literal 42573 zcmeEuhgXwZ^DZS)EQlzg2uLp?HFN|CHKBx_5Tr;+fY3vz76^!fic&?IASk^kO^P6v zqjaQ6FDghcA}Aop-AVBK&bq(5*1dngUF)3Vd-9e&d-m*^XP%jDv>cO_E|C`CKQhwbXQ(E)0yh-osFy(sa$?{Y zn2(PK7L9c@a3c!I>dHeEC7}x7XHf&Vg^{6WmF=a{_|M3PE*% zuASW+TnG-H;1Rcdb;?QL9dMpRQ-p%MnQI`{8)>OeUIFlgmjjpq<*v0W}?+3{3nCg55P8J+%Yz`d;2Ba36y=@($F-DdDAw zZdNk*5NUIiBOzGJ6N%Nwqw!|)NF9H}AWs+R5Hla}v@aBmk@m$a$pl#$yXhHVuy_v( zf|ml^#6=0AfYgT?fMb(9n|mHa&Q6oahYF&^?z4|78V3jhgF%Yh$_<&EL8 z!OGyuO5f8QZ|USDZ)K>Xs^WUyMFU?WeG{UozJmvj>2nWl?|M*XbYT{ffdflD8$p%7<`jpfOIztRF*N+4>mHB z_t(XnSerU~1$m*M3d;IHCOSlnslHsWpSGM1P7V`@Q_y#HgjwKpERFr4P@JEcKNt!x zV}XMzyUF6LlmiWj&QNU^7w-^7@*NLF6S#%7wl@)`;0J?Rxp*R><}MCarbw6rPF~v7 z#S}b)34)pEAoWdP&en4N0hUIF(iWNl@IXI3Fk{W&5N8w?FpiRX4o9Uvx z^}!u|3v(wJutc;1)W=xY+rhwB5f|)<^7pc|v_yk>1)0kmdg6?HT}`1u@(8@Hg9pM* z7Oss}Kv?T&`=OQO6oK{W8U*R0JiPs|o&@koY_L8GEf=ETqossXG9>s~I5`D78OqCP z>npnY`+6%8f)z|W+~urHebJ6?3f4$n8BcGRGcmx&(8N>Q*g-`y|} z?-Zhq*EI6P80h=zSt!bQ>k$kQP&klI&XX7APy{%A(v1Fba4N$3O)uS2-gf4jSw1 zXRfU)ZKb6fVBnzaCL<%`te_|-ZRO|)bu~s4&E1@|{5+K%mEZ(3D@THiZ!l3As_6>e z^mPkx&@nWFI}nVdm1Gc($})K1mNkjwW$_~zC}Djp<@_C>3JS(3880H-1C2D3*HgeL zDd{7T0f-OI`ubuWG(9jO0g3@crx1Tn7c^AQK*uD+3#TLPD{G`E@9p7lWvz`M>iQzBy&V;d z(J}-}cYTbKm7b?UkiMTQ9tC2UEfevm)a5JoHFcgp1Qbc&_ zI9R%vqg~yswcQO7!EUZD9&kB}U`-zvXH!#aJv}oQLxi-EiJ2MH-A&tz>_`b{Cua*N z8c7U75EU)0J$#5BNN@81BRoOR$`pi3O-oOrwuK1{gh5S97b`bk;QciT3Yrc?xHH-N zddMS*`X2fOSIZz{3$TyPoCD3ZjJ0)iLv)o*tno5v4<|pO2Y3)+M$qdSk|g@FGjdO&^M1LS1!vhJp`rq<-8vR1Hmf$ADNc$g#ntPQXh z0ghTarY_p1z;A296-_<76$9LKvtZfHk|%J8#VXw#aC z(uIe!GOgujv-$AL4eH4<;ldGNDhf9b7gT>V(KOB0g&Vp(7QMMw+`IgHV&Yr+m9BP*VKG_*__G;~6It9%(;iQ4vkH)tTdCJ9Zq)YthoLVp?IAGI;F zhJoK`NU=<@TsNGJh$I?_ET0A;mRXkpT!IfAWwwZ?$7s5o9@9F+FAL#~Ri;%x3e{^3 zqr5oDOwuPbWyl(zzxj~*CCZ4buE%KOBTkaAR^;*j-q_R)E9aj$fAb}4m_{433966q z&X6kMJ+>^qA=9_KCxLiew5{jwICZsC-ZD84zT>zBRfX_Q^3ulBGsc9&DK}yd(YD?u zylxj{4GS}dh>EYZST`S}jEw=`QZ`7vdw?nlWqP#tnPa(vJT7S8qRfdZl*R+_UTeIt z6O4uq#+p?u$^^}Zkf+QfrxA8@iXqFF8N$n|K+75bwpwZ!JOsp+1v0#(2UlS_VN*28 zRNa-cWfkWaKhX1L(%_Aw!Rjb6QofRyM02DCTXi8s>%S_{XtGngcztKnck4{s>?Q-{ zxqS__8ikc-6GqN1@zIb@(Iv8fK1tO8icU^2xqof;2rZK!+hrm4nZJYbDvK6Pcb}nS zQ zj!Y(~8D^jykA9aDq?kJCDsxcPCDSMvyzv`Ofyvo8>=*W@ z;pt@t087knhDt%`!2V#3;xnZxfj28LyO>E4Og@#L)sdiqF*TQb{SQT}lQO_| z)D(EKwVDpl@d-2X%}<4w@zPTky~a3fs_EJpK0ZjgFsmakcQz8Kdh89w$ikpNgM{}d znCNIPGT%4_H%Cq7N%6~5Ez=lS=C{+bbP#Lm5LmO3(?89~iw_(^ zB?QjW6aVD`u;~X>MSgCi%!-C22jtV&YI*Ee?KzT*Dzhs&DQS~G{K9)B);TKvK~tQ7 zUf<5vkN$@;P8Ktr8NGJj7K8a1dJ@@{7rdAlD7#XEJn$Xb4@dUepUK*RLZO`(qTx^Lc_MNv)$D_b|zHG9oL(RR%}6RV)QT0;?7Tj1=W0RXg zb~eA`zkm6s(535<{H@r9-x~{gpNcWZg_sHL>B{CM_OI+pZ0i;JPsxiEe1*j6ed7@J z4eVa%FS}Q8NHu9HOM2b#@V&LVBYH5BCwUDBCLr2Q722a!9OICFY!Bv!lW*^_`yY_30{xvu@z<*> zzm}82?Cqdv;VH>o_p*jum4L-l9aY{FjiS#t2S2FiZvR@b#~4W`F!&u-3(Zu&mc5qg zURs+Q-}g{9P5gLBmFD|yS$+PT8R?n+y1^f51H;=x9$%b~H(--=S!}BX_rNQ4obq^! zM5`3om0K*_{5K;Qj+nJQkT6eE@GLB@790|ePN^q^d9F*;_0_men#*0W9RLyqk2V$ z*964b&3-ic&2+jh@~DMqW07?)5#esiQ-M&|2S5BVJxwHf@uHJuFW2;iYU36JpFLow z?f&Qdm2&?V^q$b*Sl=%w7Hx$%G|Fx&1mPw45hA?vR4geiDGu;bRTt(vK(!>EqS82n}jJ9C4+wD(zo*)wX)~4G=61n~nIuke^oVWd6vC&k_Q?@Nqu;xLQ^|#n3*{F)=&Ejiw ze;DgY!^o5E6Gyp}e8O20M2zn(?-~@owGL zZ@(@zbZxy9?SkIL?QT(%Bz6e2CTXv28Acd1YV}n;GxWOj^fQAsQ{&z$?wyfs%d|E_ zmUV?PmD<>H-YbEkR~BoM=~=SfYYhaEdqci3n?9O%NGzK?lRPMqt3``A8m9&btRr)!|=E<%^h z?pgEHzHjvNQg?aXSTD8R^0JQWgGY7saYl*z!siB-Ctkhkx#F;ZmN;GB7U*nv?az=) zrma|tEPXK4NAbgh#sjvTSTDsi=QQ!s8cns`_1+}g9K+@K#gtvaM0WXKe?H^?`Veb9 z5RX4SH{u$4nO0(4zJgy+?c4UZJ!tMet--ifw`G|cHyulZUjDL>8f3c=+NJiZa;V3G zaRJfA6LqL*nzLdq=ymvkBz9)8=Q3%eX`2#vo8IRQCGMm5bcHWg@TtrNA5rlBkt!%) zIf4*tQTR3?A1Ic`_Vb&2PwaWU;U$+tKHYV>Pm1#;ZPNE$jZZ3AHe}id7)ccGNQ^NdvAUB?N4G}%f!Aq%&8%EI^cpq0J{B(mx@km z0NmhzvGg^Ph!BF;i47hSVl8(dKeUMId#38cM+rAU$grEvT}PreSw&MdqFP|xhAC(N ze>nUam3Q+TL^UB-Ff23|@{b!u8ddQ8B~n2^kh($4*(twqu31~1>a!GiG9gwnVuN1Sm}N+ zUlRmTR>bpLVKPnTtqr!XS1DVEDH2%eW2+vh8l2?<)T-s(KNk&fK7ElCRB;gC=k&*{ zbP)T<%Pu{MkkzilEG4Q+!UdYVbY7?Q9gSpa+<9h>c?xWw3NC>pS$wC@ivABdEI^nK zi#VYPU?}UNAr=0Ur_&?Ecs}Q=STGx}a&v-$U%x?E*V~Y2Uv!jJH6k{XOvlNJ^#^Bhd%zhDYP+yAGpdeZP0bpXTzX?rShr8W*65*J!XHazeB_P$5xd1Vy!z zOwTTv`Ur1m@r6rWT(m%miTw-nyTacy{!ck9fdG@dz*>|=fwlbg(KIAt3vJtMkp3g! zMAn{k*le&({&;5Rn0}NZ{9~{f$|np$av57lE)3!+HJdIihws%*TWm=ZX>&*^{Vg4KOu1!LMYi{omv$1OoWPY66=+ z5uSmdXn>pnXw*%GVTl)>CP8?vt(*Gq?fxpN^_wYFU9QoZo4VYct%F~iYQY0IH@{rJ z;`Nm2?h#TSWb=Yy%-9 zR1`o@R_nj#@-mkTH=8YG>54s)+9&f5QFOo#k_;s+FG#LoO7_X~R$1d$RvKE<^U?{o zQ>7ikQ=u+MFM{+j2>uwaa=uQ~0CiLgx<%B9LM(Tm0qSU7z*6Bo9yRN@KG{G)YFQ^~%i`&`-h2bwYE-}nWUjw%;5GT@qM2dDI(PAoTipL<_Tu-T+~T7ON9k!$!5wFnY*KcQ{MQ_ioy= z8W1Kq9gqd_YGXA6pb&^IW zMDy?yG4a-+=!Yy6&834~R}Rh9U}f4wfKN6V8}L#yE{&{Uu-F|?X9!bbDsY4a9rxF$ z{`3^^r*u%!c@17~a(0);@pzUJ#-tR*&j99H|0!0;47{bP`#a z_7l{|Z-KM)l-EkgsM5Vg_8dfCi}#&IYI$}~jnr*w)~f>|u?-y^tLtrYAU4;iIYnJ@ zkYvWw-;cR9yK!6zSZ(UX&_+%u?k=!uzg>EZ+Aou@Bk-d&zWU25zZQp}z0W&dKMMYK zy2_=CXQPS!Xr;%!5C&o8{97ky_P772FpweRXNt04ZmNQ(HL>6G#aQlsf@eP?TB3Gs zq}sF4{?+v)1kcq~mebEF*-DU2=(en27{0SL8B_DJOP-@O&t=i+HkV#_ zSzi17vqXq6?SK0B`!n8mtwsj01sp9fe1hCneyq-soUfWxaGDO{hyN)Pwus%|zx0@2 zgRHQxHA%ex?rEs35sTm|IS+b^(5o02Amz-N2QCdieSiOK*-0roo7!HR)DR8}k-}ol@|4 z$ax{?+5A}j3+ws-Pol`^`4ySH(ulY0;wbG3gQ2b?ilbRKS-1-W76(n^;K>p#X9XY1 zb=@ygEECS-kwXXvk6ksIC{c{S74)B#z8p5+Uo^lx4Sn-E(cWH)w2N`ccv|f}!P6q8 zqPG4~YrgUKa?z}SpS$|Pr;;K__gbJT37#r)uK?%#VBO~0Mt`As9unUYFXY*p6dxZU z_p)-)wvJff(|z~JOZb#Dnq_gSDY5H3Ep$e~U=Yq?_R=lwLS%WiFfBouvSEV^KukT6 zTNHrCFcxXu1b{^ZSv-2|fkyZFl<)3ifUY|2{bY}R6?eHCzG)3=w zQ~jheNJ9_ALt*iK3D-kdWxLx00=N3r8Y8ppneNfA5B1F)f5cphW>pACd030@eynv% zp*l&sxw;m~SINq3+o9 zFEMRml&ub2QlC-S6z>69s86-=fH))DyA;}~@rElo+}J5(y&U6$>Z_ij%M8~7?uXgx z=Fra~5`+t;+miJHM-~cRa|~e(Q}j4}dYhdYmg2eYa;qv>ABody^O`>wnoM(beYP7v z)f{KKmem&4zBSE>5Y>(0WG^Wm6zBdq7J53cBURp1llK-feKA=3y!w!NZAFQ^zbqR_ z!CG&dT`oF#SLi@m2wLDu9`6-w)1NUO<1)jjhjlh$kVSG;8ye2s9%?5fN}h*l$c7D*dz_BGRZ;Mt3-}DUPYrO38z;49hU6rUmFVp zdeL#Uw;ekD{$WSw=Ve}3c~DUN;9>nmA$cW{60WGuH|aZ|$N6@Yr&r1%=6aUL>T^7;J5qP?u)=}_WpA^gyMikH%jpZ+@b~2_sS7i_ z4}VxV;S~kj{J;GySi8a%n2;yOedy@-h+d?sM0tt&y8hiUt;c1revMyAlCu_BLOO3p zV{)BaJ%{6_c&F2S zz@q`JBFKzzQdgpezlBFEIpjo$*6f3fNi`61R*VK;(gn06jy)UQIeeED5aSWCC3n22 zFi-&m+N6#zvxi7`nQuJg0dI4Oe?@k2f0cnJ~c3LT|Ci?lz~1T2BdA; z;}2DUX+9{U%w#HJ!%mH~u{S|@;M7kCv09Lg0xd*r_Jc6Eh1GEr^<@F|i^3_8Sab1E z-T@5x&8c#0o(8_yW3Q3h%~!^U zp|Lt8kk^A-Y`mMT$%+;IWQt=u$X9`K|Atmm(akBJXF1F*wCcd+?K_N70gz=rGScTe z)&di2zZ*}{61W{iuI6+dK6$%FknypVtf88y_`%*%SL#Yukt+si$6PAIfX)tO$vWde zs@HBJ%SyEYT|j`FU!z4I95z3?12wGAW~bciBjXQG-SlockR(ckwZ^6#1F#jE zco;&F=+QBNtSGOxz(gNtAN?z(CO;tMi}VUG@d@SlW9ka1Ik55m-V@O9@CF(9`X z%$Jig8ic&2|36Ko`@g5DQ<{_AdS@+vO@zKmRShhy`+83`k)`pZg!L0)<447~j?|~i z05;h;JFqeJk=O4IYZ(>6=aVy8i#CB@WB_fF%n+i1&fQg=HQxqmNtydG7Qf zd&2nQF7d+?-SmHz2>02H=9s2LG4sCg1Bd!R@`z6Pk!X%IJuYl$;`gn+#fU|il0H#w zE=mMs)h<)I8vo53Q-1t6Z>+_TqWAq8r9#V$P}Ck6Pw=!G_{~rbP8( z;rxJIF!@HbKTS?5>3=O*qL|&_lK&|V!dX^Hqs-0oq(wWy`s3a1v8vaF4MCd5dq5CL zt&;HqBzxisW(=i|I_lN9h1&p6wE@{qoM zxL+@Ff{nrifE>?6k#;4XK0LP0v;6?8s~D}mkM-^3*ZT>6E!%2gaTKp0dg8Jy(-VHy zFTdle0yZ?@lU#&s0toac=@DCmoSwZ2IGrkQU{In0+bs^{G z(X%|qYQCkqXHNjQY2(PBKh8vAMYCx36&xEO9HEGxcpvzkEf2eHt9V*vub1C-t2{>( z`pfJuR{1ZW`d@fY+|1{?5G2jHKw{*{SJ55*G0*NeP-=_+S2=6Lu5;@MH9qArbhKfq z;Zv3zk=rM8>*Bc@QLyhm(#Kaqb zo|+k?Fq+dd7u%^0*o*9dC9X`JixH$#3t5j^{@LchI6x4&KKt=8{=+@)?uBoK;akJ0 z^6uF(U&8$w^q;x)3Jc93WMw^vD-}ZKeK+PKUk=s>dRGN3S@$FkXyxh5HkLpiPoz2I_XG=mEhCeXAU;CKm<8^2`gEU!PUI- z`_~Hk5Qn72t)pkX%=raLzrS^)1B_)Zc``WleZF6%1&-bXFp5wKhaC<@T@x!a39nbL9`XDAQ){MX6 zyqH(NW+y^luxDMQ&sq#{QA->#>BlIgf|$Vv?o_mQ74Q+QEknn7 zwNyz}*)z8|&Rel2Ewn#Ut2ZJ%10}!w3-Gf){Z4%O_DTHkHYdhywl~jsJ|iYR$5F!a^isL!NfUpQ3MUs_zvpa=f)ZA5SK6h4m0B}Yad{95K@l) z`u6M}pLM?;gow^Xk<6=vbw9wJ>0Zps2(YfaleBYnO5m*N+1**ae!v};s{ed`U*{sX zYJf$CivR8F&ND@&w)KU;9Y9s2RwBx?Uq(aEQg(g~prxJB=Sf_wtBGN?gw@{@dv@1W zUo*}RK6jem{WWp_I^YXAcj3lSJIzqJKMSw^h;NhW%k8n1>m0i~KVQ7Ee6`i#oqcN# z;rCm0$G-39xifTiJr`=L8u=a25K9_49QM#b?Utp>1N@Dq*Kx+A_% zMUU+LS=r+eVhUXo31wR<-e@ttZnis@`KO?6_06H2{wUphsD-{LLmR5sWNnTBo-)`( z@-vs&^-71gH(m1{7vD!+DB~^46KPXotDK+FeK+^0-v;C7NxZ;8H)p@|g=2o=&(cKV zV*Ezg-geo1KiWR#y7NDuyZ$V8W!skh0m9kX5?X``*ce)Gb8aw{Qa!{30HC8c4s=MU zVNd<0wKF$%srdov5scELIgJ8Ny-2jAvh+?{hR8tc0H4LYCh z*Xh>4+rAH*SIMP{h@0`eKA<{||NJJh)BE#j4#Tvg8pmycZ$bx8Vj|$fHh`h}b_jDJ zsu)H18086P;e1hdNStv4zL$`>BN*b7`7$(gJ?E^>wZmvz|D?L@`J$u?_q7zQ2B5-! z**Z`xz?jlMsLXsh(!^S8$0ky?%UZ5WH;>mB6k^0A_6@d>wu%q91#l~R*{nbL_QWz$ zsTRS)8HkvOr@z!2x}6tlHnI2TSN|cYm#ihySO5ITyw6#HF0p1`%6&)WiCIODgCKV1 zSv6$DOo3kr3P`*d?#I?c_b~UK7~qM|)B#~5??2x+8~xVoqC{wNQv=w!-50)90oPs% z_STP1m4)+F*M0|7IsS7{=&9yWwe{G#mB!!M6B~R3@Q)RMijjAG7j=6lz{kTIQ#jpc z%=gJ@p1}QkWzFKtA<3G_8#8CYTyS%7yK>l)ice@_H&$mYG-ct@mE~xU`1cCi=`Y4Z ziryG6T=W=}mJeE;WJ<;?C%&`2Uquc)Ytcs<*OJOapXba82-`nj06dv^5z@`mx=5~& z7Tr0;HjzMoJbW>ByvX=RD)gQZiRF)}nBh6F1&%|H8|*vLt8m9lucmKypzbrx*^OGGP|P-a1+FX}s8b z;B}P^6xaSxA+T#HiJrT8VM=39a+*DUb&$5&?!ZIPdeC=y^i}Ad=&C&Z?$(bPJ=OQK z*1wQMWowH{pcKrI&#&`#sJb9w@UvDhXEJzK{3wO_UyilILgkL_jd?aahwc-PGd`*-VbRNWAW*e8f>$aqp06{~>^?@1Cz z>oSKy*m`1BJ%{D00A?0-tzPLIX(4&^i>e(4HF2A?CJTM%2j?+n%vM4Uu{iyTIIAux zQqcwHRzvvW<%yRgCH*f};thCJsByGezv(<};R79%EYU=Ywi2>}6EimRCUrC+E!s6~ zL>N^A7G3=~CT#kp(Z=zHZsnLdqFb7vu6@pjtBcP+WcQzdI&}YhR%>wCdoA{PJA0N~ z3#?~wY2-!OU95=XTB?CwR12x$QrzZ;7mXVdWawI8d~M#m`^pMHKz=QUo9U>KyZHtn zp;InfBsJMDkA?zIiZ;VJuDo$d?t4foKV6wRlM>j2^@qjB)U3rnVQ0paJfru4ignX& zVv}uAVt(C4tUo1nt9Q14;o|A9SPGP0)D(g87d7XutQqi1jokc%KRl3*_16;d3Q&HS zQTeciWW(`6t2LToe#Y~)m~l{l7tO~8+7LtNY~sQOx9iYt z)8*L(9>|qxSn`Ho(vg&Dsc)^}>h^S=r!V2Z0PuFH-~%uW-+$QW$+63W?_3B%x)^O> zDBRzAyS;u%q3g*bTGhwhPR-|{CH;G_>WJz%L1D4z_#1B*S3cIwvE?W&gEkhIO3j=r z#)h_w@i|Y*`7ywU_Nt7@4fM7>;Bu|$Nn4HazAA9;ZNjgOv zJ`8VzxhKgT|9Np>>G}=O8m3pFZ@vOJGI9YR5mVvK5U;Y?Pih#NJ?}JlqyyIxI_wM+ zkPehC&os)~fbL0(pCZ6(V=g%BhZD#0&u{!*BU)c68mOsX!llGMix7 zT-djxw~j+=o9PCrcy#t{Wz3jTiG2lHXOaD`;Bs|IhptE3?aG>FhKX9OpKIqJpq|wQZsT)N-%Pl-m#2F zSJzrxg%H)68Bmvd+YuKx=|vVJSgBMBDFl#HeOlsg{l2y)$Z|b;%o}@#q)Rt9dc)l( zTh||KjQmtt?Cp@BLoWR}_to_aOe1z`sZ`u;0k9YEEiOqY*p9tW*Ap~kry=RmyCqwe z;KAXQqh{&D*a2!MIelI;ZXic&($664(q{0_P;b6k91 zg6K9ME%GR8o1nCxSC$?JxzCtpGb8N>z*bo(ezf1)u-7}flrSUM-232fpRwd|z^YN` zrVG4)Rr~0HiSw-Hu)k`0ZL7`x-u5v;)5(4GRRef2XS76w4eKhozy}j2#!Gm-T8)&_ zHfjZxh>dr3QPd7NENIyzm?+RcmV#X6<=xoe<1T@--01qV`@7$Nz&3A+FIbR?FZggB zc>$Pik@>C9m-P}*2kc`bHBLgxCBS0*oDcj)br1`Xo9h?RJZwT}OZ2=_l2cjulf{f%|fkzq|k0Mu- zL${Kay{eL&*N-?%^5Byy$Rl)K{u|kgB~))jb?%SLH{s0Q0tb40GB!p_Xn4(9d1m& zMGEUY*4%qB^her6J_wK+*M5&opfYzCT{4_V8q{o0lAP_ejd!}HxvWm#?P2VKGoP5w1Wkly=DCKZpz|*9^wDx^*FX~( zlTM+ut&pAlYHd4YXq3<7N$AhHUcHVqMPpn4e#>PV7#lUQI6_V=obWGBNXTvy?m&8K z;92?Q{0`4Azoy$5b}uUc!KyBbA%(J3yguX>0I$pFegS2le%WSAIT9J#+6H}&oF5CE zkgz`SjoJpw23j~oA9vfDq)as(5a*33f|S;LIVc~z<}B~}kr%C_ElC-iDG}gW66YuX zDmaVH3zOPu@$zDLb5{yfZ+Cm(6$&J$=U?!Bq>{n}5s<_!H>a3@Z9mBT`+@y-rq@i+v|gr<2Uid>>R!3vcm+z!7`2)SfR5UMVo`8J*PNVRF+rAFoM9 z(`lN>@{EwC_H#nxzaZO#Z|D zqBDY^yc986bDo1vGLe0EY+^Sf1f)ZK6MMT8GuhC;o{=0*H(`8WPE^9OW-{~QR*P{T z=n+obY1>Dd&36E5wtJG2cZ8NBT*{D+BuFQ@ITO0`Aym7g+06RyZUm2k93l?K6HkA& z=?r<;hTqHe^T#tcZ)`7lhb|mZgiu>;_COA>`07YQUsI%NgtdI3jUz$6*8HV2zzcO59Zb|DzHxFiw{bNk%|< zuAm*gIz8+VRwrAq1=eQN0ik^9GGIqeC;#nK0u4Io9SX4rw6Rx$RNtCPjQ6ARv>HH# zps(Qj*Leo!8n&zQqUy;9kWK7+Amh#fz0{d1U++TcMDB%9K0A333~lpVyJgBpX!G3Z zfIcHcQ&M?0h>As?Bh>|PT2U(*W?~ z3Oj#~kFvuXh`dWDmnf|1G+k*TdkkoZ*;pRCGQs*-mD|Kh3f!R2W42Uf`Z)CX- zlzQ^a6M=yh0-SNnl0NTzq;%=}yLR9V$j(K-b?tA+3I>bR93ksx!9-y~Cr_2K`3Na* zE(g9UdLVHrSHWX&^4z&8YLiM6*gz*9!feqWY}79IH~G6Kq1Dl?x1G=Qwn^3-?gBS> zu1w}57sU(+LSXln)}$CR0g^Nk(Y&m^&N+FUH6N)_EwkHw|N4`V^;z0;`wT)Gh?bRH zGRFrY&Kr~8(!5c|Y8xjS7-|?HcvjE?U{!gko9ywe_IXijB)OHQfc}4LwgIAOM}l&4 zQLYtc7Sf~ zd{CVQN^raRExrAfCHX4CbnE#(r^EYm>edXt(ShUMyVG3eAL55rW`A`uE9r@_RWFazw@hB?u_vlO?;4kUc zcrB<$n3BO@HO@zzNny7jHbMX}*PY)P7aHIVuyDeYo0Ln-G@kekGA3w6`e9RPfD^VyvQWAy@5vDIU{>s!-1B1e7RgGQF3naohT7;aUR z?Ukh}@)3slsRUyd6LX7$8BM=eUX=mX=P5AoES@WiGUPB-b#-g)W54dJyW}Pm8Izk4 z8bjndX^jo)$*#fjL2`?Ugmq=Uhh%a=BRwOF#mu8CrPg&sF=kk<^XXz&x_D85y>%(P z&)4@cI(Odg@Q_(xN}bI{kgWO$pinxzXlqA%5&Jh4Nh_X2k@TiCigG? zsrgaP&kNxc;=QIXl0jq>XNsXa|JfhFDLIvMTD%5y>X4p|`Gq^ki@%4uEM8sT?F(%D zk&%BjOhAoxPKMkqGu&`RQJi7!xo_?*i1UHZJ3qS6UFyHjt18HI7_w15FxxVUatALeMe5<$~b;$xax=y zUrczVe<qryU68B|?5E*-hdtsmq&0!bxcp;nr zEI6sNI$u;)$ftEvyQ7thH6NTl%R2e^u3hrG%b=~sH0-7KggV%l`tnZi?;0l1Ggp5N zUzIxt4|_?bvelKB>lP&aLH?izBNWBcOQ3ng4K zK#z9C5^Xzu(}cx(H^-o@^Fw716C^db%ny;<#hNP-YM&MtZR` z0@^PR>)t*4=J4u*ZQQdm`OsvkkGQtm?N4-+njfQu*WfH)2A?28QXPh6wNw>+Kn`w< z)yY+bJNmpmq9}?uvHrVFCeT+moptxq=^Calg0xr15fw^eCyum8DBOW$tqDm4PW}dj zJWc^Kp1`Y}`i0>rPjP!ZXGPyXJM}!?y}3r+R%S?_@BeTVfml^Yrd!_+U=7Ok9U80s z7d{=IioMfLW7{PIh$eV|YLo)DqAf>TBn2GD@Je!Se);^f5Q>v$Yj<=~C}m2v1#*v6N#2Wvy^gF2Ll#J~pd6@0M5AEs zO?K}GNJ}Jo#&b{2<5`S4FTg79Lwd@ME9`26K<`zy`T@0@r!S`!odL(1 z2*N|JmlZ#@+_d}ryv(2+Q)bsNJg8goDPU>hRp@%1@;Ldl7tnjQTE0~66q~%mn;W9O zr=%}yJbhO*&YpF$-fwZ0@A~J>(%v-h#sa{YJw>W6Rl4q7M|@HqW?LP5KX{yOgRN<1 zK?pVeLrfh1d`T!MNx&6vLDv(Ri*|^%ICJIrOEU zC+l5%-`?7c{OS&pTEih47^=AOdyKcXDyQ#GC>`In#Lq|#dAiLDf8rhhyyAwC53;KY z*?z)nvVwPBNp-r9-SfJi7qKR%Dlim%Q!+9x>2R%hX+i`r&MHv;JlJVuQ#bPkB;6(z zxDK;z$vSnBdvRj-YYE4axta@a2-?SI%`TxlPrA`BTMr1uB)qG$mbxX#SeJgTjpsgt zZPn+t%{OkRY$ZAVOfp`KLOqqNo8<@4Dt*?iiA(?Uiy+t#)8r3qMbOvncrLk4$^N7O z>4FHaA!axyV@G`}?wN;z^upvO;K0m_P$joZrjwIy)YMO}ty?t6=bv633H-Hm*+&0z zC-73PzxXToFyVez&;khFC(dw`I&spwmH)n{;>ztbA@giMGeQ=qD*BxbI>99m>f^x*Vnif{&Nb4O{Sqvpx!c?$M>akJC3u-Tp4#$ z`2&Odj{O~nFa1i*}GuwgEh3TTP z4u#BUUj9`1pW7H_Gf7_KUx|dY8Z%DrptLSF=}h~}gH!3%uG$)M zWA+8MN+={xAgn9z>pP5ockEq`Af2Y=qE-D(jmRV5HSG+;UkPRc7@bWrXR%!Km1P55 z$eXMfqQr@3k~_0RZ4zkI?T;T5**ObpG&Zlv1*_#0@pn?`tp2xuq=Rq)y9Q_hi;H_( zT{$sFy&5)9{Z;PhU+KjM*pmKvFQM@v_7T`@=cE?WCzqwJL) zKVTQyYBIjQc6?*=>-{2fHb9|nst*BxSafMx7J2tf_Jj)UNxpQUur5`1&KXD!-;i5%L51r9*pz1sitWH(B#v{WNgK=m<9BYQujPSsTW!1pb=L?w~6 zq^|^n<^=i=Ki`=|Vb0OIXTiF?mnb&>5*PS<(OXss7+l?Zd4k|wZ14W zg1lAyt#AN|*}U4|M`+9h3HDCq88a=APyvq%nwyKbaG+E-|1{aoOzPE6c|7~g%s=aS z-R2L$*+*`*Bn;}LI>^Iu*MHSKP+z;;d{li?V+Bm89Sq9A2LLW;{_I+~nYV-mntX9%)-)6}BzM1!PPWO7FJj)Lbf*R9v(SaIr zi`c;V@C&S~cQqOMDG&32GotpEYm-$Os)4%3h`p~D|2X;o*_zQi zx|hJn`KSBrZX9UntXG*y_p^B6HI|yzR~NE>wqyR+tG#O$NIonNF8<@)3DlM0tGW8l za#rat15O*2gw@-bf+ptkt$`kx>0brrd4)q2E=2&kXjk*dIBI_>)D&O&mcZ4wAeO$u z4{BSsl?P>EO#*Zs5DZU2biNUr5-3HiYX@#^O-qFq{9GO@a_;{OycpX1<@cN08)c!~ zLQHD{UDmOgof)c>Q?B>8Rdp}Ar2Fbt(yhkbzbd#L*H0q%W^OA_Cz|)Qh-4O%@jg;z zPH^M%(NLwjKZ>umj09_6dI>YjqH-*&R0hGh#Zcl0wdGh|)8wA!bFJWc@gfmi%N@xA znU(yu<6oYBuLms*-J3tTLn{&y$@Fszzt`uECn?S;50qTW%QZ^-q^n)4d#}F;Ek1XM z^&a^YbH!R;K*?8d+&Ft+s%1FD^7YmE`Ypi_bC(A8;bo_f)l$EZ$lNTby)u=1t`4zl^CsNxTpamhr`qlMoZ(B>3Ke>AsTjPE?$4xLwXMBCBUK^KQO>PrlNEKx@Y64Eg1-Y|n7f28_B08gL-6-gw5uKDhSNzgw@x(^ zrg@QK%yu&!X&eV0^jzVC6+<%0L$b~!bzHVyK9vTmzgrOD2mPoJf!(Ty6zAyOM*&!Q zH=Z#C9qz#mpHG57`J(I7yvBEveLHY??q+1F974jr(WYgdbhjwNpL;cxIqVarOj59a z^~3Ji;b$H3d}^8P-laAV#hLl9l;H5a3b>DDXhY}2YPqw*(YI2b!7(;mfwi41gILMu z*M_Gw+7g`2p)fvW(eOmbR=-nv$}w9}sP>dy>3l(p}5#a%w*vU?-!EnaV8>T^oaY|dQV@ARdmX7rmWqtW550e2p6R`STT5iWqqOWB zjcjNkHJf+j2F#(MD=O*(sgKd6?g*AvtaoLk#y5!O&KqXay!In2n_(rRCC%q+yH8Dd zgQlYUt(QQRxwbVSce=EZg&!Q#>de=MC+9WGv)!1FPpKgY6g(XuHA3Ft=I%GFU1o^K ztxCxbw}g)=xhFqs9`n1#VJmVxUJl-pjL|D=OJC17?K_^I;3jn0g(Smke+dc2Bjp1OZs>PJac4mtjg0qUXWWG^56>U7M{+twT^ zSY@?qMG^5UPmB&jpfQrjuzKEUGKFyO`m+_V%@}9DtTOfeN?g8|$vSIz*r6}(bh3po zEd+j*1cbdlesilaZCu*@086|b;Y68Al&7bEcFB45dzVs{Xf7~##1yBd1L^H$VBEDC1>47Odfat)keE)nK z$d}EiI}-%sAI^}L&VkSkc|fSMc>!`4{`|Z+jZtQ6gi(eI`U+`)un%06;@XeFklK7X zNI;spkWIH`ot1QI8~gT%OJ;ZP(}ZOlLL*4x%|+dpdMJShk>{Rv_uKUjR62@(a}3YsBQ}A1-J< zLvxqj!IJ-Ke)4dI`Nv^E)!8@+W~TB-wN=CWq|bc!)~ou90HCWM=Fpa+hLat7I1h@k zYz27Q*Pp(z-un;8^+@#Mjc#4i4}e6EsIX+T70I%d&ZG?eKb}7yJrA&E+&5~Li=d3J z435Fq!Fl*Nu;lW8!)#??kEv+jj%+n6(?=*z+z8WCWC3xaj2>}jz^NC~Epq=^J$Bt* z&xYcfw3C5HArpPftM5ctg$mkfeAJ1q2!xS`REjHNAGIfucN$Mq^4lnU)X0)D?%Z_Q z@)WUJyfClT9!=X`{wP57O!Q0h^7Z6L$}-->dZmW>CceKu6+GO5m`LLg)M3ps0Qb6J z@U)A){1#|&$Ru&BJ;QMJUenhA(RLJ(m8X4gA3-otUq-sUO_zb?x!k>&8V*QCyDaQ? z5z{Hi_Ae^W4!o2vLP(t`Tr`z7k`o6F@qo%QMgg>+Gyk@RWwIClBdaCu<^QKc@FJ{gMj1cJ?HIrdgIt)OOgLHJ1=Wl9B-fr0G6EC58WFg(gK>=&g zWnz-jX`-})@+KWZ?hi^L8z{!vZ1`;=Fvj-!MeCYucOgF}>Ix8j2QCYjImAIh6pvAZ zp#6a?D7W$;O~SY1$l+w#IH=)pL#b0lkJ;E=KBAS8-_IUFS&f*T9v==m1Wm+nFx>## zt_uygr9v3r<+C-RK6A(V?)DE<4#ISjnA;&zen$naeWFSTm^}EgW+CQjuLeZo1^^Bn z0OZ=G$@|!8K->X6XIZY4RMg+D1D&=e7Q%A1&GPfet3TAfeK_q~`oeAC3f2b6!NBh7 zm*^bY(+^&(Jjv38ZaQz*EhKcdgWfG`XgdN9=ax&C_B*H%5 zEfrbEblbuYN!`6L(JdD^9RUjrcQ;`q1Xk$viAh6R2~<1gM>;vM4a26w#+m@$LPsXs zH90y*_>uvcb$t=4X9Qc1=)t9Az`_?mH`A*R|I%oPuIW|);RK(-B&KmM@(C&)JO-p| z(Gd$L6;+W+fJKDJMUWLQ@*iP!j;QOBfE2O*KMGxaSe^BJ6s<`K@C_9r)4Z>h-VY$( zEq|0R0UG>1Aw1oxc(38SDINGYxj*!Xs*C%ojWc$hTd@C6~htUV^|MUX#KYfWtR53e~w`ez|d;pTAT)zd4&%hdT+iPYj9BZSnaO87Zuz{ ztjs{X_Gd`2!KUv;A+mu21^x4r44q6CKKHd-f&?6&Jj<%`|h5O3q=#)=>|1wy$>CLyojs85Ho%^PQ?Jz!RAxpD;M_{F`v3* zsfszi(YhDte7%{z_XX*FLd7F|d|EZrWUYa2n_09QSu69jR-{kjPT0SR^C^Um*uiBs z?O`RJzcM<(K}f`Ym>UjWisxd)@xGHmG|V#b)1O?O$)wd(WlboC5d`bV;z#s`b~(q`q_E{*6aY1%)~hQ8gXH_6>Y@vM^&Q?C2gDGp7gcR zug>s|#pBwa?iOQI2u_anBHO_02@wpJB3j4c zEMBC!0}M*0dx}X{?|-1TsI_d5iWq;zrSgu)UBJM>OOdOF>?`mT!=5L}t5T^ShBi=n zh(2}_FYPY6UCeN(A(_N&T86Ngs@cw~71ao(I#R#8KYgW>&G&72$IQGQdD@$xmHU5+ z_ic$HGJjB{h)>Z$*bYc52{p8&9`-O3z5 zXcs8$ubx}JDty000Hd4YG5RZbD`o4I85IIYlSGhIEs)F9msB9#{oN92RgG9Sw!X2P zWt3A>QUX;<_rd&@iu(@3$D_R;xfsR^qxskf*)jiFdp#msa0(_D&yF?jWelrdh*KL5 z#V5z%EGN?E7lAY=b@_O1}$p8AqSP9Ym@!U?>o8?M#$97jcc@q|6g>EWjIoA77(@n&BAolEDIY&A;7_ugquu!~i1 zJQ}N&y1yv%ZO0j!?)X`fV)TRW;vK7_RI>DK3e9tc1{=%`^)jedh?vx$SmcWQft9lt zd!fxwsY6gmDZ{MIPg;Pq0Q?{E1k0d9dmSaGfi^7EAEMrOu%_F#eiv|dYFFKSJQOb; zyuB!CKg7u7nY0*H;e|VJ&qAo_(HHRUh+!}-5<7@+j2OlAnu>Vh%l37w-LGb~*_4#N zw3v}X?6T0}7=9unA(Y3<zK#>1b-&LCmYk%2pYMB~W&Zj8sr3}44WyVk= zqjG95(iI-J`JZ2@i#kH<4egC=uzo(5BsMVS$(#R7A&&e{s)Tos$G3i%o_XJLiTm8|P#Tu!7r^M}n=W;4a?nK7?ukxyo03U<%|cngi>yf% zwIQF$x5-?;NmCbN6gbP&2TZ~R^wAD`Z$Q{mQ0=1q!~4(4RsEdYnP?a6muXSe9s$L- zD{TYz$AtzleXA2g95(5H(k^+1a$%~kWegQlCmVYheAJlXFi$V3lE3ZQD z&Ccs3kx$I8a4R$X^;dUz05{6J8|)Wdl3tXf++T>K?t zyF-TRkf(-!a@x;TKlGg4gt>4$Gu#O{8_O%C$bJ>duzc5(Ve&S4&VM<`=yqFvGzOU_`Y;j zzMhNOk=4}R!_G#7B+K%({6XH;AM~qLa&BRN)=APkmisNEB`@v$8ZTB)v%m7^>#2DF z*X=)4WqXv#OFv>Na{xWZ#UfT_gy=h<8lO&N#}_>zVIwS^rjfTKKhVQ%Q1@n_{%XNy zaF>M5%@Q=Ln`Kdb*=A^}<>YpoVC}?CI+3@^*bi&(RMXE1S&;8k4e@6;DaR-6?6&(*YJzQP(+AOT%+V}|(8*xS`|Enm>s>rw8x+wgzz?@w3%*-yE{&)3)JQYspC z;vc4#)6?&g?$a=13%Yx?XFFg3D+IaZgD?)f4vM_tt=dZ3fJU|fow)pU& z1dp1c2u*Zekcq0Mn?Y3Z_7PpOG0sv*&&%#R#lj!mPoSpxm*N|mMip9=%nq>c^|=UN zuI{-X)GBpv8SYbDO0KD6yvku*`8mAuQ5F@|@0Hk(z6NzCHZrv?qOXBD*C>8B*CELWNTsR%D%%9Mp->9{*{x#&HU=W+`A$z?E67~Zp={#;+ zsCEe|sv5q89{P-S29;dycu8v$r!=86MP>i)Teti>tR7=m)wqzfFapsa@>s-0Y!;p+ zmN3uSIfSnCqWALe1Mi(*-#lE*?(5q~?cHh{)pA=OElrPWu0@nHO|>P;nR24e-y{%$ zBvZ6f^0oWtB@WXiYHns0v1*Nf+3DD<9GOB2DTKG<6;Z4yWVI(tb0;s0fmO@-k#4B>L`Pb96uT9UTwW7(BT9pl`Xv;>Xj$Tksm2eY-}F?mxV%a@)VSE z;A!+Oi28EZ?viC$?}Cg;{DmR#{!|ERq8v|e*$2h};^C94G@vF&*ZG%qKxZmFbhnr? znQFx!h2PQsj4~HQnH0T$0AyhGTK_;JZ_1AOPD&{b{fFec3OmLK%?0l5iLuoM+m0Ce zzdeYLWsG!SQlkR}t9~Q}cT^lrgBXKLCqcNTVb3qC4f&_&Lhzk2MzmoLt)cijSPWWL zBhjhSP!3&f=oyKET7wOLzAEmMuW$Y4;;olBgW%wFJlElt54L*R1pK*q%dj@PCIa`n zf*7$$2Qnd{L`XkfgxX9Z!hf*cDjadTT$aB{^nAAT$$$B^D+tx<;)JNcp&TD6(8*0; zq+Xi#muyOv0nN=rjlowh44q?5e@mwn(|{nwIo>VINK6z_!nP@5{Q=2kwZZB@P@|DE z&h+*fVtw?CDQL*(XB%@Tb?a$FwC}DGRA&-YkyU2@!={|dE@?@^hL4$@bSWaN0pvsTDeSyR zsiH^U!@S9808icjBQM0yS#U?je%%wqLmY-!?^aokcLtSZjO8nOJb-OIGUceRCV{Z? z3?K~JQn3`-U3k`$Q{=+k&T$*M>G?O7x}KQW53mz>_x&huYGjCY`u^trqh#mJIq|kp+YWLECSXHTaY-q%^nZdQi z;?#1}ZA4X1{cPGDQXAoXKQ(mYuCa$e!0E9N`$aK>7+n^&$-$>!up%woGJNg+V)K2m zD4)-A(Eh+3c4;@EwJ{AntrIxj_|_nPJ(f@3JWe%qos^SIuX;N-&sI1kK-hXvC2S&Q z3Jd}Y7F4=k`ievTD>u?Dd?xp;0yel%Ypu8V@d)ens~}lv(NCU)P81<2Do8tIMSG8+ zbyd6dE?mjCG<1s^0B6|>pUd=b$Ax%@C+-|;huDFE_iDf0v>|+?bg_GdxFZQZKE zF?&m?#PdvSEJczS+(g}^+ZRt&qN(k zn%fve@p`fgu!HLHL$4^@)Ju5Ja>z^Dl1l4Aa9j15`&M^tK83X+;^jd%OC6*gkG)A4 z)LcVMC(?b6Kc11e`Ru`S3A}-r7eF~i0C_|{z*UqX6F5uwniUKQ^%+mLdo*0MY3}B- zp$z5VeBtDiK1BWwA${THELTq6d{!@%CEN%nXaa0%a zvI^;mvEb6-G0AQ9t-Hfn13dMLCGafY_EYL}fV~R-aerpbheg4|1rd+<#TZt>NEVgsiJ!@|8b5o)`4m4Za4pX5X&s`svV@oQH45N&IqH1O+R@Zq+$0XmEK9xQ7v1E!kD&PxjKhg5HZt z-S_?5Whkda+AH$Wj#rrhzvU6*0<@WcaaQgDbIU5g%_9IbGLVAEKHc!cgO`YEnIp9X zYaaoQKtNKAOY?{>c|m3gxR$2#M%~HW`qIa97hF{o)ai#F*d<2R&IXhYJ%2b2&4Sq| zPE{s+@c--inO~Ve?q&Zj>AbPq1LmOBo>cx3NKxax$(zR(n$aE5r=MoV?@}%I&Umtr z4ZfM1S0QQet-3?E9Y=+-=m;EXR>Bs#PgX>pN<}5=!gzg^lGh@14MKtgXC*<-7t|+% zUTby=eYxSH8iyc_k1t0rKW{Y2I}^OlKX#}D!l|=+mqOc6CbB8Kc1&A?*}an$E{Zs| zVxlWxevHsh$=qWS9|KXf5H@^;CjtZ@hAibRoqfhZ`zXMTz=?>t=#n97AV@`DEW~l|XGTnVulW zfK{%WGU3Ps{j9IPoMeqFvps0ws(MksRl|@nYsLUHubg$o5BN1X&4>YkYVykZ zw9+P=CiNn!czH%DXi?L$O<+MYk;Kj{4s-^Yv?!~h8FgYFGKM9EHrIdWvIP6~QHYSKs7v13XzAU2{Z_&ciZpdu&7=Hfp#^CseDA-#EQIVN(WL}J*5@^L z_RFsb#Yhku0RwIvcGFC^a+W$g<@#J=*kHjXR~MQL&~1fOb>J9j!EAV>JA4hQPNm{7 zt`)d0TiNYBANj$14C6TR z9E%HqR!{+CO+M9$_ctR_!99DIavA9reNU-v^e~txxCQ>P0|(7 zX0mNWMNN~C8uHKJj9|#*h(jqGZeyM8jJlcNYsy#ZFA*r+(L=YsVM>$9R!8CWby+$6 zcC%iD_b8MVytu;j_GDP%urEBjicN0(*{B`h?vxzZ6rKM!f4fp79CDTEre z3ciBB-laMJYdNEAHLw=8U*XLLXv}?|Pk<$;8$AE^9?}=-jvuUKfxEM>E`K?8mIK9p zRE%w{c>1-Er4zP|PD9L2RTttIUhLcFXo-d^G>Q~m2B59(90Ab}dg_{plOUK%bKb>5 znM`b3y@kGZF7&m{6@sgXOIFI!2U$(;c2rytiwp9Y5|jxhCBOD<;0OFY)PvFgeW=Qu ze+~ti&bF1n-)I~I} z3()&U6NB9>(vPy$AbV#9Le5EH)KB=mmWY+vE0}sFMpby*GP4Sxr<)`Hj|K9mECy{U z_NCWybWE_8;1r0k&LQ0=nFH(0wC3WT(cki(zQf=!mTwAo?zoa-C@CK9x#&-m52GlP z$LScD4oDED2$I={f3LLd3M1}FL-IiAfZamwwfX#CHzirnnBaB2U?pRP^bZ?R8^%(7 zxKF=;cxc3c%U{TPDjx%?5<+W?pw4Htf78J0F>t7Jt+s1s@VHQph8NF=!Czofeqp(- zjP|Y66Se-N)D7IP`v?t6vu!NZy@^UiPH2-M^Dz7|Yrsht3Mf8s)`}6;#R&xx zkezZL6&v=e|KSA?;~{#)JNpXhJaC<%_WH}!UE-oKeAoUOGc8ZhWnV?W3+MxrT!IU& zSPvHhDQyX>dY#Ry5$Hz_AN=~B`^)6f+_{l|?2f(`T#!)>U>yJU*8XOClk?;|4atMq zkTsx=>A?o2J!a(kJZ`og@*wgWHy65mX`pCIniJNQD1Tmu{KV}9oOnlH87X)F8old$ z+baIh&0r9q#n*ogML79I2rml2Qw$N}?jlF(0=g~r^Fd;e2^-;-@MRW-xU61?QIHV^ zxrvAll3+Q&^pPUC8LEAE8@$~wzE$Iq0u$eYxjQE5;QhM>0XDsEUraQ{(QTeCJk0kY zhe0KyTqh-y{eGJ$9oAt*d*ARXmk<-DSf*g zf;6!Zt5!r(!5WhsI8C??|3`UlOKc^2QdD_UFOQ7C{)!Zg++dnpfvjOM@;lId?Z14u z7nQurbbVPlnBi%|P+%whf2b+P^I3g?k0EW{H zp7-YR7#{)%zM$){i}>XvZf6>TTZF5&WM>Oh(_c#@7)wo3Fczs)Br^Xgr#VKN0s@YR zUhMLOLTz*pONtS9Tar#`N^f|CBki_JcDKqGMhcr$wz{!j{vk3>kgAT^+*;#bUyuo) z2d>Af-nfcGYi7?EyIryX9OR5(h>Jia9@iCtrV`O+Ga$80SyW%5NSJtSRwgtqmhd&3 z6+GBQw3?qTWb07IgD>~-sP#*m0K-e4`MOP1(3NyumtO+Jg*V-*vBna#2MKWQXA9Au zC5s)wD!2A~G)a#0UEXdk+3C-IzWH1AD*u-55Ujmu|0#kz*4M*1w6XY8MlLh;%g|(? zhs)&(zpOmv@PHqc);{<%klBme|)?Q z-o6PrC44Jg=RT8olFF8@)Ulj*;(0jLG#h+g^A2T&^o$}}^2uRZjo0HBxu)VkFB_(4 zI`~u&t)5J-LRolh+7-EL`>tm0@nLGYhLy`YbUNF{7wGt=K`i9d*wnL`3wbEZeHZiE zAhxIPR*@EIWn|CNbIxfB7LTfBwd?OSDpklfI`Y9Q8}=oQx$|f9H#Tb1*>P1o1MT(+ zXFPq!h-vqxgAcpPLpp#f!hQYy)O%;six~Y`($a=1Bk95^vZW;vT|}h#{!FvO={y}> zuXoi|0v(-C%^dPWR40CEkP>Fx>gXyn=r8|+yL-o*X{PCV%tub%voJ5G zYCE^H|B8}9X^P4v)1H0x6My@cbn~za9lU-=Zh{WOtkz|L-D{7h#aWgkqDqZqJAYHR z+>-*I_zBw|i*(|hvgnx9Ruu@{nrWX5UbBlhir$*8b&oWO#~7J9)F>}d)xI87SI^v1MpfPQWPRk0t1Dg= ztqcopzc%`^)-0Lj(D{W19DsFZCJcpWVkir@gr#NCa?S-_UUzzx*wu{gt(tWaLng*w~PRZ3h?EQAqR*( z6Y9Acg5XrGh)H7SD*|{w(QWSd1BlPc1lka6NnEl%XpqAj^`p2OOrkW!Wk#BWf#GP` zS#QIA=Y^4&`FMkm=i3N6qtNIFvA40S7H<4_sgFG$yaNlNh7GIjW6$$*KtmPAJy8DFxCcXp=KI$Z?RTdw z>%#TB!T>!ZPrSO?dvU>c`jI1Un@ZhJxC3I%!~Nd8?LxwyG2+uM%9L^FN7WKTECT>j z32EgD4^Hrfu zHS|3$fVp?V9UqOlu8y2PoaQ*=6YX*|)XY!-@${jm@sZWnyU7|8z27C$Un@H6KG8s2 zwgsOJ4cnUIj`sXkNb!2&lvzg+enL}3R> zOPerc_{7~0ni#b1({lGam0V)_Xd0Pu9~NI&6@*Bk*#xApc_xM8m1Nf13PJ6Wue` z`YOxNltzNk zC*K2K{Q}q=i@#_|OWJlKqYae7?~|`l=YJP0;Nnn3oC2OQX~NE#VjzIN4J`Q_euVx2 zCUZr=OQb<``3JxOMMH-8pyRSN>Eq^N_0j#CphZUnCUr{xn2}y#!d`mV#r9f31*;fm zV}daJ-zkPoJU>;?1oHiDNS5WoLRXA?{t}1wU((%sMjSB5G8>`uRO6vXIUqwWgs%13 zVM3;GNyb_kp~^*(R_I^IYg+*hhxtgejt{+F4P;;mlczQIXDhkUMWAsu?FAE_X=o;^ zvfxHu;?#wKSbB(b&U41k^ZxeV*&FbrE+NecKh*A8kkO|iS-*9Wt5{x0il+6IDN}9Z ziu=ZtLAB$Uru)i(EH$7_h{T&()I|aGqZ~&WwO>fZ!$OLxY8SJ0h&vszwk6Gk77cX6Yx{XecG~`m%Zr;Rz=R^Q`(7{y{j2I%ZGb4?(4Q$ec%A*JILeBtUMZSp?O)Di zd@Y#rOp~Bh=mwB4B5u%oWIXy8?g$oS(dxr3TcqOw)yiK16uhuEE)f2hsCq4SxDem{ zvYy4T5lDP6~L`($ry1i*XA*eksB#4y7UqrW2~u!N>#L$OkvMC)@8wKUl)WB>`2e@SeZ2E35q zUMT47PjK)LQPKK@kYWks{5w($&mk`@=B+0sny`@mV9_{GTf<$I!4E(kR16HrBpQ^KQ`k5N=+?6-luzaniM;>ucUq+j;d5J&GH{(Ewo*Hk z?%ka46{=gQ{K5EO>&ty9HO^w3p;>QOghwSeR;>dNfj5rJ(FN5_f^S%`%6r_l7b}3# zw1UYyG`0IHI$(+ypXBwWAmJHbeg{UQ3bUZKv2t2d;_5)rBQa~ms4L87Z}L_j0WE8- z^*lG>o$jsMKL3)Fb8M{9cn8XmR6Z?X4sY4?|1G`;!njHLLgIW%EYqW*(~H+-Fi=erM^l-T#MA{=_Na&J z-0@JnilFYg?@Gci3Ddmeh?3+NhKG^u^(6HfxG4G36VT*qW?bXI8j@z{oI#}Y6&KGN zU4?li!WAKFB-&zj2{?+aNkqZl`B)&ZEre1gh#~Y$KucG6AGFaJM9@p(0XlUW@H4~j z*=?nhGwB%&p?emQ!KGgVKhPVHDu?9-gaux*XlHkVx1_vnZt=Z1^a_S82QeJ>L0jF+$9!Wowwu#`-1=%w? zgy9v>fG~3fuU@1LZ)?6oPtdYu;NeHtAjwS{xS;<&kN;fIXR1e1I4RN+6yrM^=IH-* zs_>D2eu@`2Ga7TrxBuLJy-yywC<6~!_%*qEOQNf9Z<>*U~2l+pug^}OO@H2{V0J~Ws){~^14^VupbYDkl=cc%^1WqEJj z=l~}))q|2DT)Nlr23DY|t_FtdY?!Z#1|tQRQ4AOkE_(X`06Aqv^H@~GbK?aV*~lXk zC*$rZDLqvO;yIUTBbAXjB(b7^#@+s$XJzT$NXO>)V@jhujit1MUG(N)YTi=#mLDBq zdn0$kdClG+{|HGc=!!>i+ML(66+7ud-@6Kyy|9Pkuh}{`wmp-gO+DI}Uu(`2O zCR*C@O>`jKRC>;7>Dzt0+VqNmGyjPSb9sZh3sD{Ve9!WBIVtfd5;QP+yKa1XOhQu7 zp7KRh-?I~ZIhYWoT`h~bYj!}E$ip1MD*^>}Cr2{y-yR>BLT`FJ`|;9xWL}3DiykWl zTj_~{E?Ia3&sYYQ@rNJr-Y5L^u4c}XNS(6D*2Lxm#J{O8bfuFiAuw>IBa*vu{JV-y z;WVZ=PD7!cQEl-{ar}Lal9w;(og=-WIB#*kQuQrC;8Mfj#0inJV>+J-VNl+#Mh>@u zeaPFK64sxZ(m!~$F7o5rL`g%zfzhEUSh=jrAN_Sn=FD{2A|qgOa?(dK(x&gafUqz{ z#-=;h7|fHoOVMVrp#&L(i%a9!4VQNe!BmI5M%P7XR z;@3agc=LmCW8wN#4gaI-Z@u>%CP2COfK1yJg4l}Lwp75WZ}e`qZlT&cugg-tkG^R> z@gtk8veg>EjYnHsJc5GM)&6F!;{9~&KTG)!Q~0VSIN3c981tQLemy|C(KJEcJ!FrU z zOFlh8K23KOZc$>Q8TCK2zblTfWedIO?KUq&R>QU1I~f6!V>0SrOU|XszkIJd{`p${ z7qjNa&SK`-iz(QCy7^i$z#(OGa&l_H6t+}owZA!cRqG|+n{bEQFA^G6b{`9Y2H_Fj zuw^FeZZoq)^b^l>Zl^^3zxLz(eDvQ9%P#6J$!I(seJMT#`hEN*|EkEjHQ#8_u;t$& zhVj<|MU|u6%Q>0cZ{leUf4cM?G^sbfIj;7g8o}d*F5~sUOY?>s7sSujZKdO(!_^HK z)cMBIKB;FV+UxRzbH@08Mv82fXTcf;|GxSWe$#Zpc(29RxAraRu>^@K7I<(EN&Pxi zujY}%Knu?!gJ`du7*RZFh3jsbJraa`=iT|s%MGe-H2#_1TpE)7_%ycHPwq@$^`&?D zz}ph3~*9C9Ze1zrb07Te;5^+%?% z!PxQA)@S0-AFOXD&A7Ke&ZL?VhL0BiG^z98yz@bNf825#q@XHlYHANG@w2pemYSzH zdJOPO+o$S>QRA!H`#0JC_&zpf4&Ss^R-NgP(>JN`nqh)vWI+96fbM~`fl2q zg?QKFKL1pHJMebJr@=cp{oQn7$@gcK)5RXY&pp3wmYeHuYvuF4G6RA~CFB58<_`L> z5M(p_gY1O(u&9)y2(6vSk-MHHT9MTj>P)&{{Ry3GU*6k9?)`Om@;A-+qppr^daJC)D^hk&~Ax{}_L@i4;F>Ao!%4<#4m|!D`;_^K1Jhqv19Nx4~_@ ztUZO@1KpFE1kY%YN!=y>md~|6O|UP?>Knkf({MfKf@ja1-OkQd zB-^eS1#*ff{!i^huVAHw*oll9sY%`^5X7aHKetaA=Fpfg`Xq*-kks#5ONt#+9ywD) z`e(j_Cs!>m7aa}>j^PrzHvS+WF?EKg<6`B@6ML)`ri(jH6i&tc~Ib+6xW0~KbKX#E5ZRF4op*(e2t%Q_^!v(%m#7}QKE3bMBjz)Ow` z^b&DMUQ=|vkOdR+8nQrjGB4C+*Hc2gx4TN+9Q(-KEjR_whubh&kYVPq&yy-Vj34MhdUgh_Y?%=Zic5 zkWRdFx@&fNkoOeBUU7A>D_Pk<`zR+5&+sVM`jU^FVXS-+H<+@Wo@@qm1N&w}`&Ov# z`Ji$zy#>fBTKVqXyPPB1mp}-E5dFZ#^<$yCpjNae?iTD_?$;{ACR=w;hOYTTUwD; zIG7AShM}QhUT*HSh77uGb+TqE5!R+xm+do;fak_}cF-2X zf>CA?#NQ5Nu<-(Rwjy4FY~YJIKDvi0c_=QEdR8!Jj_K>u)Qx0dN;k4jQxyP5X;H9W z1HOP{`x__JQM4Fy1gepoSG3=d|HY-_VU7hQH8 z?(fZ+WMG-eGCoEDM$h7Rv}-;__xLNx_8FcT`Z7^C>3}h|M%^J)NDCl`AQ*ZzE~$JA z>k4~3JSgkn`N|G_2(#S)saQi*xK5{_DV#~*-kx1ueY?!GiRvp1g7`m@p=b|?jhjIGn~h#~5%1+oEZl;4|{9L31sDmCvrsqZh>2&xVf zue`LBWcdJnhFKq6Ujr_Tc~wo?3*lgxH^OR znFaVUdKU$C)M>_dyPF)d3R8SDwD0ZFU;b~lT#V>i&kxZk z`158sF&_l)I6Pxp$^rp!YW31pehPRa1&*k#UUlzWXzYRfn`wnH0RYzwUiKn z0)=M?$dSVoC>oZ;QY}k|hGU2n=-R5lpK)OyvSHvOdr=jJz$O#?cyi?sTR&cm-a=}& z{VIBij#JfISo5=;NA>IFUnI>`ZS6^E^u*+lc_=Byd0%H}ZY_q5*a#Qv*`S!rYn4qn z?fT>tw6_dgPf(csI@bAefuf|+6czTaq;I=Il<<>QB3}ql4)k#PoV%=d1>y8ZR#@>6 zD5@GBOJQPEGrt0)w3MWi`DGSpBi)cY$O>VU$8e1yv^5Np;e=0B59Hn(fj@8O=98s)i^jbwU1K6|x?Ks~SvsbrXSJ_{K-}riQhtZ@bh(*qOis*Ae`}|OKEs3^kb{431 z$2f7W0B6^mr8&szhO#kU#a7XIk1M5t7?y9b?idjS@xg+)#mveTB0~+AwT36%tx>hr zRe7|!A`TnXKsYPp_ZcB>tBP+Y7~z+M+{=B+d2!v717Yk6oUkVRV_WPOI@ERM=Aea$ zSRFb7xO!GrUBv4&$PI30o*H*fFEiYL|N5#bFdvE)bK=kSWCD2&F*&$2ID^0fwgU|0tbp+JW>=PC z5Bg?_;C!Nb^XAQsKSb)@I-<|N)wr_oa04?V89tbFGt?2T;|gpQ4uML8Yxr>cGO3y3 zkq687!alzdrJ5Q7e4BHadX6-$OWA1|A@HtrgRuv;RHxwb~*Q&eIA3-&jK4x>UoYpQ4rff8k0d*!HzqUF3wjRn; z$q5+SK^^ffs&gFi$Lr;toq_1Ai=uJsoT&ftqX!Ke1a?IOt6yI4JW3UEU$%grg_p~Z zRO}Nl(@_TDH|NfBKLweRyu5r&u+@=MAImsSZbO44t2Cf{ajUBP2DqG-Uq?TvH#uS7 zepYAVx-lg>0Tw)F5pR7hEiDIr%Q-!G)mlBXb?;M%Xh+hwzTHc%v(GM~bVFgAC48=m zm!h}r3usx>25-STRzHFp#?gu|Gq5e?5{zA4UAdybvG|(vpc7htrq-Q(;CrkR4Sr8@ zS}eG~b%6~lGgKf!G;`-_rptJ#ZwP<9NpU1}E&X*=ICM*4D#cyz-ToBp=f*_AX^Kuw z!IqLc;3a&;kb=VYk_m) zr(%7E%guYSMduxjwY6S|@bXX|p(O9A?}j%T4E(^l)%!T3&h)xdhYiAQluyHvh_>sJVSiQYfdMwq#Sr` ze&Ym&RloR0GwDz7Eh_@<@BZwiD!?47B4O7rBP4vfVUm&&$zyx-eVU5k6+oI}oc(^> z-G*5te9kPNb4^KV;}{^UX~y1>dfe68%_4ZRe%^9e%lP}+ZF<*itx)}vJM<3db(SwS z=65=%##gU1k6MWSW?p2j|A0;>-1+U$Bl}YE120Gf3DT)AJ~yUlJ8CoWyy&J=W^=>T zUd*QFvKN>Kxag+%U-VJ5f0KdzXSG4^QX|(rbPrc>!})CH*D-cQ_`x!$bWbjN_>)9Vz`fpf39bTu~W3Vr%sh_;}Ez5EKo6|)*0ukN$Us^ybU zEH$rFrEKp9PiS_oI_aE(pOFEL|ADzEh_p5S} z{hd$F7^rryy}xRil~K`D*OD-nfh`#7fa+z}?`>L*3U^{%&j?YBytLS(@ZrL-gKRbp z65(*&zh3OY3<1gXR=CUoUD1B8Kk0qilIjbcY25omKR!P7AF7gWI%b!77VW=# z?eMd3W2|G@ota3=APcrT`RNW8*3xYC>RZ3j zHoN>Y8(OR1tDd2ac~6_q{8rUWy|;iuaSr!!PjcK!)_z^sVNJ8`$0ugDlms>3mf>EY zLAmpN%X}L0aYxJhl`BIjOOQij0sApE#@;jO!gt`Ao7IK5XiZIq#yPwn^>~!wP>N#9 zVuoBMUxhQ;!g{&+8t*Ewst2x>7Em!72;c(Q5YJ@w5VH8Bkd9Qxh@V^{WOP9?baD`} z@XU2@AHqXlteRJaP1X-vxb^Ce6k%7fzQN%halc8YZln58 zcwXo4;o<}Rmp_77bm+Sedj4K~EwFjJ;H0j9|1?4ZL0174=YYiLys&0x|DCH|bB`$& zdWC1yb%sL;g6_cwtkj%sy(2{*+#tY0?0I7@2fd&1zYEAC`_YN2FML%PE_G`7)1Lw! z+;3{?xX8)v(mnV!ujmC1ri-pQi4xWk$?VO!Gqh#u6~C6 z0J-V*b~kr7j7VE>H2sr?%v(x?=&N$~ZhAe;LmCZ;@(6|_C^>o~ zM@Vm`J>9~k1gEOq%^6b|2}1{VW^os!k9h+5tU8ES*VlW5=)5mrw!>YdB4cEqGed)b z?gX*Tplj(1^cSx@IUH}7y0p&h3*LSx62P~JA(NE1uwuK|Bbzk*svOeuQT$^FyNUgN z8?H6f*o(hbwE_(9Pko4W7uco6>O)S7MMVg!mC8!q)*JDIZYEBFnTJ3>R**WF#kRMz zi-Eo&t~1v>G%Y_sCN!HwlNyfW8bevQ%8KMufz30g`@)9+E3sdok%&QNO1b>mcj|y; zh$cnKHPuUUaCBMgqra(URx`99IiavfL_nzR#qJh3u81-mH=ES}58pNBT;l3AplDIGw9Hjc z;h{gUc+YG3S>z)1OPOqMcpcX!bSiP+&nvUm9}4{Kx@_$*Pb*~#`eEdu`Y=Dub6)NNET15zSYT#0Z&?|>N`uV&- z$Ii(q5$Ne^K0ZD_$#t<&Yj6{_l@1Kc8r&ysg zmTA=JYrw3LcCr44k#rhOUZJ%26S^1O&V4v5=^AhAO#;ohziA#ro4hrGQf}XZ}sg#NNPdj&R-kC^H257YD=8+M9nW8)-dCfv?urA*{naq zH7`ycnl=SvXV>&53k-m#qS_cytaCyOWXuDy&=PDkbKeDk#%9z?GBaSmnrK=n zj=60i9jBY!D_P!l<7Ic7d^@S`(MWo`eWSM(L{_>0J&DpXcR(oqBmDb%!bTopv zX>t|*C*Jn<_H2*d(Tfv&`BH$lQWlh1Xz<2D5y4`p@$OB&;}qFxI}8zPe%+}IuQ5Z5 zGE*YF7PnM68peJE-z5q%C`kXu+7nOh_C&sNG)6+zeRf$7Nn@&ttl6 ziGKTRf=!#uyeDWw+wSgTlXU1tHD)M$NqWouSU0_^?V~ku2g$&DlET%+QiWBTi)n^= z;tBs^Jxg@Yi@b>WmzuFjy4Qo9(CVuFaKEjA1?a^l`KeL5d0n6&cEF8}v-n2KI!;Q{ zZyR((J~#0%yI=hUIxI#T&jAeuMzUkCCOMxt_7d%0MP=ijQu*Lp>XVe=3}9L99+1AZ zn6V|k|Ai`elsbGX$VyzJBdS-5xb$Jl^#4`Zm4`#UuW_?vY-d6u;xv{}jX5(^j0rRxmY5z|0mEWqyoH{%lML&Jo4(=iL>R~sR_!!DZRf%Q%|NS3- z{MSq5ZY!_|z6{;)W=RgL!nJ;pU1Wz7~h68&3^)NJ)Ft(3DMS7>9AD zu9szI{2BIz_hd}m_17k-gVkpH^d!lw31pRagI0b`1_5Ut6_U4B6e+L!nTDwC4DEh- zeu^2*x7!`s26mvIq>lW3Z=+{Wwrpl{**eq{RKPDJY?j3&Pq+&V>lu0S50*sdq`W*% zQhn9)O@MFQeAD7gcFgJTyqXKNHX|x*KjH;5dWe?#(ZP`-DGeziUb zjn$FhV1?k_ZOOXstm~i@Q-*I)cE+cqmw`2>iz=vn6-UyG4pfr3LjWvtK-Uz~1AjxZ zLlCi@0c&bg-uF>a{b>$WA6{;8mGd@rpr!d4`gm}hkfF2GTkC9AX@idw&LkXcY}B3; z6Td;3rg$U!X(|+x%I79ufW8t)YwkamnUYC^usDatB|Vk7Ma_an81`Z5tAnv09FT0~ zyYUmn4Zr*2E2fY%&r4TPg1f^YJbD}K9lNtU+aLkx3_1Fa#o_RM}AQ%YynAp zIU$RT(^SqT!TzsRkrQB8%*pG~uj>=6CMYyTu&LiI9D_JIM)5#;ml;qZi_Y`;e72E4 zSPfI)gXw?p(*vZ53hI12|C#cDRQ((D)%HN%_15bni$I*l`x9zqu3Y>2#wY?13{g8Y z3XK$u25CC!&I38AvCUk=YTm4)R>qA#gZvhtjG1S>Hh6dBY;;+1&Bg(K94JxkuU5|k zPYob}JM)2J0cE@-{w_73St$nQazuQ)2j$zLqlBaQsj8_cV;ZUPQBzxX;#t0>2F)2R zXchfF{}U3>!%T&dkWXV8h!-H1+j0Co!J8EOvVObFeAs+v>`7KdiC-N|h3!}U>Zs8* zAIZuP7|kCpgQA)XZO|3=X6VcYd{=DLQ1OPD9;U|o6*xt9+$NalC^Q5V076u$&zNAV z4(q5Osg{D1Ob^^|_9_xeB$C>>w?^|rLODS~-Ilx$0H*p$I)v++N(`?T6gU7QPOjM= zXyR(m)}s~gIhTHn?I?huRPC9+-V641<8sXb7}0vG7Lf@+q$ej?ak-M*K~$;ZIn(2- zaih4nxWPYaq&2gvx7V=xBpctu?+#GN#;|e*6_3$2?%}HT7?15oy|5y?ba=|mcoFUp zw)2~&(0w$Wp__}oFRi5^!%CHr<6Q-{M)S=wS=NJLSkvgf)Ho|q5w5B|G_mf$q3K+> zq}xKg$oHpM@noAB?hD;Ar$pB6-i8j_8*!d>(?m)A$&_&dY%aLs3G~9KeJAt8E{z}8 zSie4C{Pdv4ms#|Hw(?!B;**D5K$jbaEpu+DSrc@eRTLIqbTpc~Od}l9qj;i#l^jlc zq&`{)2#a|-UoCFZ)q-S(KP#jdH@i5c)tT|Adm6*kC|>)(FEOFEA~6CWO?n3GC$<(N zy9DIhFMG@@Gy^67F1m`*T5@tZlNzvU%MEM z9>P49xRyqgxgR`whyAj9gY#mihrU%>(W2G-*e`C3j@Z0uuXW%f_;s9`bF|fsWaH_B zuiDr)*+0zc;Afxyxukri)p$)1pF%7bxj zt_tW(h{8NN(o~Usg~!q&Z(k=OX5^~HJoE6mNbm9GX`&h6Y#1;Ecb6o|pdIAw%S6O0 zVvO`}^7eV0a+Pi^?80uD$oilq5lfnTRRKoNUJdQL3j($Yz{gzyZde`&rrcsw&+EFT zlJJ*hvpfYzE(jl$D-}gKUlwER+QgkdK0|@Qz}XY9NHhwoQ{4Mgv&?c{iAQa@J|Wdy zx(hk_Adboqo&l@US z7viFFYxJ1S(zCYMN$0L!aq}Y$4bhvf{m0GM%D9lWMaKZP%vH?}Odi~tDb=9R&?|u6 zw*jeUpY5K<D#F)}#LmBp4JrQ?T{=EOnCAE Date: Fri, 29 Jul 2022 01:05:12 -0400 Subject: [PATCH 10/17] *: Simplify join executor translation (#5453) ref pingcap/tiflash#5351 --- dbms/src/Debug/astToExecutor.cpp | 176 ++++--- dbms/src/Debug/astToExecutor.h | 29 +- .../Flash/Coprocessor/JoinInterpreterHelper.h | 2 +- .../Coprocessor/collectOutputFieldTypes.cpp | 19 +- dbms/src/Flash/tests/gtest_collation.cpp | 2 +- dbms/src/Flash/tests/gtest_interpreter.cpp | 24 +- dbms/src/Flash/tests/gtest_join_executor.cpp | 448 +++++++++--------- dbms/src/Flash/tests/gtest_split_tasks.cpp | 4 +- dbms/src/TestUtils/mockExecutor.cpp | 18 +- dbms/src/TestUtils/mockExecutor.h | 2 +- .../TestUtils/tests/gtest_mock_executors.cpp | 4 +- 11 files changed, 412 insertions(+), 316 deletions(-) diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp index beddd18c37b..5176ce6939c 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/astToExecutor.cpp @@ -797,19 +797,20 @@ std::pair splitQualifiedName(const String & s) { std::pair ret; Poco::StringTokenizer string_tokens(s, "."); - if (string_tokens.count() == 1) + + switch (string_tokens.count()) { + case 1: ret.second = s; - } - else if (string_tokens.count() == 2) - { + break; + case 2: ret.first = string_tokens[0]; ret.second = string_tokens[1]; + break; + default: + throw Exception("Invalid identifier name " + s); } - else - { - throw Exception("Invalid identifier name"); - } + return ret; } @@ -891,6 +892,7 @@ void TableScan::columnPrune(std::unordered_set & used_columns) output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), output_schema.end()); } + bool TableScan::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t, const MPPInfo &, const Context &) { if (table_info.is_partition_table) @@ -929,6 +931,7 @@ bool Selection::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_ auto * child_executor = sel->mutable_child(); return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); } + void Selection::columnPrune(std::unordered_set & used_columns) { for (auto & expr : conditions) @@ -957,6 +960,7 @@ bool TopN::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, c auto * child_executor = topn->mutable_child(); return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); } + void TopN::columnPrune(std::unordered_set & used_columns) { for (auto & expr : order_columns) @@ -975,6 +979,7 @@ bool Limit::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, auto * child_executor = lt->mutable_child(); return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); } + void Limit::columnPrune(std::unordered_set & used_columns) { children[0]->columnPrune(used_columns); @@ -1049,6 +1054,7 @@ bool Aggregation::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collato auto * child_executor = agg->mutable_child(); return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); } + void Aggregation::columnPrune(std::unordered_set & used_columns) { /// output schema for partial agg is the original agg's output schema @@ -1075,6 +1081,7 @@ void Aggregation::columnPrune(std::unordered_set & used_columns) } children[0]->columnPrune(used_input_columns); } + void Aggregation::toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) { if (!is_final_mode) @@ -1155,6 +1162,7 @@ bool Project::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id auto * children_executor = proj->mutable_child(); return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); } + void Project::columnPrune(std::unordered_set & used_columns) { output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), @@ -1185,6 +1193,7 @@ void Join::columnPrune(std::unordered_set & used_columns) { std::unordered_set left_columns; std::unordered_set right_columns; + for (auto & field : children[0]->output_schema) left_columns.emplace(field.first); for (auto & field : children[1]->output_schema) @@ -1192,6 +1201,7 @@ void Join::columnPrune(std::unordered_set & used_columns) std::unordered_set left_used_columns; std::unordered_set right_used_columns; + for (const auto & s : used_columns) { if (left_columns.find(s) != left_columns.end()) @@ -1199,7 +1209,8 @@ void Join::columnPrune(std::unordered_set & used_columns) else right_used_columns.emplace(s); } - for (const auto & child : join_params.using_expression_list->children) + + for (const auto & child : using_expr_list->children) { if (auto * identifier = typeid_cast(child.get())) { @@ -1226,20 +1237,23 @@ void Join::columnPrune(std::unordered_set & used_columns) throw Exception("Only support Join on columns"); } } + children[0]->columnPrune(left_used_columns); children[1]->columnPrune(right_used_columns); - output_schema.clear(); + /// update output schema + output_schema.clear(); for (auto & field : children[0]->output_schema) { - if (join_params.kind == ASTTableJoin::Kind::Right && field.second.hasNotNullFlag()) + if (tp == tipb::TypeRightOuterJoin && field.second.hasNotNullFlag()) output_schema.push_back(toNullableDAGColumnInfo(field)); else output_schema.push_back(field); } + for (auto & field : children[1]->output_schema) { - if (join_params.kind == ASTTableJoin::Kind::Left && field.second.hasNotNullFlag()) + if (tp == tipb::TypeLeftOuterJoin && field.second.hasNotNullFlag()) output_schema.push_back(toNullableDAGColumnInfo(field)); else output_schema.push_back(field); @@ -1248,18 +1262,19 @@ void Join::columnPrune(std::unordered_set & used_columns) void Join::fillJoinKeyAndFieldType( ASTPtr key, - const DAGSchema & schema, + const DAGSchema & child_schema, tipb::Expr * tipb_key, tipb::FieldType * tipb_field_type, int32_t collator_id) { auto * identifier = typeid_cast(key.get()); - for (size_t index = 0; index < schema.size(); index++) + for (size_t index = 0; index < child_schema.size(); ++index) { - const auto & field = schema[index]; - if (splitQualifiedName(field.first).second == identifier->getColumnName()) + const auto & [col_name, col_info] = child_schema[index]; + + if (splitQualifiedName(col_name).second == identifier->getColumnName()) { - auto tipb_type = TiDB::columnInfoToFieldType(field.second); + auto tipb_type = TiDB::columnInfoToFieldType(col_info); tipb_type.set_collate(collator_id); tipb_key->set_tp(tipb::ColumnRef); @@ -1278,28 +1293,19 @@ bool Join::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, c { tipb_executor->set_tp(tipb::ExecType::TypeJoin); tipb_executor->set_executor_id(name); + tipb::Join * join = tipb_executor->mutable_join(); - switch (join_params.kind) // todo support more type... - { - case ASTTableJoin::Kind::Inner: - join->set_join_type(tipb::JoinType::TypeInnerJoin); - break; - case ASTTableJoin::Kind::Left: - join->set_join_type(tipb::JoinType::TypeLeftOuterJoin); - break; - case ASTTableJoin::Kind::Right: - join->set_join_type(tipb::JoinType::TypeRightOuterJoin); - break; - default: - throw Exception("Unsupported join type"); - } + + join->set_join_type(tp); join->set_join_exec_type(tipb::JoinExecType::TypeHashJoin); join->set_inner_idx(1); - for (auto & key : join_params.using_expression_list->children) + + for (auto & key : using_expr_list->children) { fillJoinKeyAndFieldType(key, children[0]->output_schema, join->add_left_join_keys(), join->add_probe_types(), collator_id); fillJoinKeyAndFieldType(key, children[1]->output_schema, join->add_right_join_keys(), join->add_build_types(), collator_id); } + auto * left_child_executor = join->add_children(); children[0]->toTiPBExecutor(left_child_executor, collator_id, mpp_info, context); auto * right_child_executor = join->add_children(); @@ -1321,29 +1327,27 @@ void Join::toMPPSubPlan(size_t & executor_index, const DAGProperties & propertie exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); return; } + std::vector left_partition_keys; std::vector right_partition_keys; - for (auto & key : join_params.using_expression_list->children) - { - size_t index = 0; - for (; index < children[0]->output_schema.size(); index++) - { - if (splitQualifiedName(children[0]->output_schema[index].first).second == key->getColumnName()) - { - left_partition_keys.push_back(index); - break; - } - } - index = 0; - for (; index < children[1]->output_schema.size(); index++) + + auto push_back_partition_key = [](auto & partition_keys, const auto & child_schema, const auto & key) { + for (size_t index = 0; index < child_schema.size(); ++index) { - if (splitQualifiedName(children[1]->output_schema[index].first).second == key->getColumnName()) + if (splitQualifiedName(child_schema[index].first).second == key->getColumnName()) { - right_partition_keys.push_back(index); + partition_keys.push_back(index); break; } } + }; + + for (auto & key : using_expr_list->children) + { + push_back_partition_key(left_partition_keys, children[0]->output_schema, key); + push_back_partition_key(right_partition_keys, children[1]->output_schema, key); } + std::shared_ptr left_exchange_sender = std::make_shared(executor_index, children[0]->output_schema, tipb::Hash, left_partition_keys); left_exchange_sender->children.push_back(children[0]); @@ -1648,30 +1652,80 @@ ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr se return project; } -ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, ASTPtr params) +static void buildLeftSideJoinSchema(DAGSchema & schema, const DAGSchema & left_schema, tipb::JoinType tp) { - DAGSchema output_schema; - const auto & join_params = (static_cast(*params)); - for (auto & field : left->output_schema) + for (const auto & field : left_schema) { - if (join_params.kind == ASTTableJoin::Kind::Right && field.second.hasNotNullFlag()) - output_schema.push_back(toNullableDAGColumnInfo(field)); + if (tp == tipb::JoinType::TypeRightOuterJoin && field.second.hasNotNullFlag()) + schema.push_back(toNullableDAGColumnInfo(field)); else - output_schema.push_back(field); + schema.push_back(field); } - for (auto & field : right->output_schema) - { - if (join_params.kind == ASTTableJoin::Kind::Left && field.second.hasNotNullFlag()) - output_schema.push_back(toNullableDAGColumnInfo(field)); - else - output_schema.push_back(field); +} + +static void buildRightSideJoinSchema(DAGSchema & schema, const DAGSchema & right_schema, tipb::JoinType tp) +{ + /// Note: for semi join, the right table column is ignored + /// but for (anti) left outer semi join, a 1/0 (uint8) field is pushed back + /// indicating whether right table has matching row(s), see comment in ASTTableJoin::Kind for details. + if (tp == tipb::JoinType::TypeLeftOuterSemiJoin || tp == tipb::JoinType::TypeAntiLeftOuterSemiJoin) + { + tipb::FieldType field_type{}; + field_type.set_tp(TiDB::TypeTiny); + field_type.set_charset("binary"); + field_type.set_collate(TiDB::ITiDBCollator::BINARY); + field_type.set_flag(0); + field_type.set_flen(-1); + field_type.set_decimal(-1); + schema.push_back(std::make_pair("", TiDB::fieldTypeToColumnInfo(field_type))); + } + else if (tp != tipb::JoinType::TypeSemiJoin && tp != tipb::JoinType::TypeAntiSemiJoin) + { + for (const auto & field : right_schema) + { + if (tp == tipb::JoinType::TypeLeftOuterJoin && field.second.hasNotNullFlag()) + schema.push_back(toNullableDAGColumnInfo(field)); + else + schema.push_back(field); + } } - auto join = std::make_shared(executor_index, output_schema, params); +} + +ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, tipb::JoinType tp, ASTPtr using_expr_list) +{ + DAGSchema output_schema; + + buildLeftSideJoinSchema(output_schema, left->output_schema, tp); + buildRightSideJoinSchema(output_schema, right->output_schema, tp); + + auto join = std::make_shared(executor_index, output_schema, tp, using_expr_list); join->children.push_back(left); join->children.push_back(right); + return join; } +ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, ASTPtr params) +{ + tipb::JoinType tp; + const auto & ast_join = (static_cast(*params)); + switch (ast_join.kind) + { + case ASTTableJoin::Kind::Inner: + tp = tipb::JoinType::TypeInnerJoin; + break; + case ASTTableJoin::Kind::Left: + tp = tipb::JoinType::TypeLeftOuterJoin; + break; + case ASTTableJoin::Kind::Right: + tp = tipb::JoinType::TypeRightOuterJoin; + break; + default: + throw Exception("Unsupported join type"); + } + return compileJoin(executor_index, left, right, tp, ast_join.using_expression_list); +} + ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, tipb::ExchangeType exchange_type) { ExecutorPtr exchange_sender = std::make_shared(executor_index, input->output_schema, exchange_type); diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h index ecda26fa6c9..3617950d647 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/astToExecutor.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -252,17 +253,20 @@ struct Project : public Executor struct Join : Executor { - ASTPtr params; - const ASTTableJoin & join_params; - Join(size_t & index_, const DAGSchema & output_schema_, ASTPtr params_) + tipb::JoinType tp; + + const ASTPtr using_expr_list; + + // todo(ljr): support on expr + const ASTPtr on_expr{}; + + Join(size_t & index_, const DAGSchema & output_schema_, tipb::JoinType tp_, ASTPtr using_expr_list_) : Executor(index_, "Join_" + std::to_string(index_), output_schema_) - , params(params_) - , join_params(static_cast(*params)) + , tp(tp_) + , using_expr_list(using_expr_list_) { - if (join_params.using_expression_list == nullptr) + if (using_expr_list == nullptr) throw Exception("No join condition found."); - if (join_params.strictness != ASTTableJoin::Strictness::All) - throw Exception("Only support join with strictness ALL"); } void columnPrune(std::unordered_set & used_columns) override; @@ -346,8 +350,17 @@ ExecutorPtr compileAggregation(ExecutorPtr input, size_t & executor_index, ASTPt ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr select_list); +/// Note: this api is only used by legacy test framework for compatibility purpose, which will be depracated soon, +/// so please avoid using it. +/// Old executor test framework bases on ch's parser to translate sql string to ast tree, then manually to DAGRequest. +/// However, as for join executor, this translation, from ASTTableJoin to tipb::Join, is not a one-to-one mapping +/// because of the different join classification model used by these two structures. Therefore, under old test framework, +/// it is hard to fully test join executor. New framework aims to directly construct DAGRequest, so new framework APIs for join should +/// avoid using ASTTableJoin. ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, ASTPtr params); +ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, tipb::JoinType tp, ASTPtr using_expr_list); + ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, tipb::ExchangeType exchange_type); ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count = 0); diff --git a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.h b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.h index d84c03d572d..610a8a54c2d 100644 --- a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.h +++ b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.h @@ -97,7 +97,7 @@ struct TiFlashJoin /// @other_filter_column_name: column name of `and(other_cond1, other_cond2, ...)` /// @other_eq_filter_from_in_column_name: column name of `and(other_eq_cond1_from_in, other_eq_cond2_from_in, ...)` /// such as - /// `select * from t where col1 in (select col2 from t2 where t1.col2 = t2.col3)` + /// `select * from t1 where col1 in (select col2 from t2 where t1.col2 = t2.col3)` /// - other_filter is `t1.col2 = t2.col3` /// - other_eq_filter_from_in_column is `t1.col1 = t2.col2` /// diff --git a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp index 87744c553e0..86a5edc7406 100644 --- a/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp +++ b/dbms/src/Flash/Coprocessor/collectOutputFieldTypes.cpp @@ -127,7 +127,24 @@ bool collectForJoin(std::vector & output_field_types, const tip output_field_types.push_back(field_type); } } - if (executor.join().join_type() != tipb::JoinType::TypeSemiJoin && executor.join().join_type() != tipb::JoinType::TypeAntiSemiJoin) + + /// Note: for all kinds of semi join, the right table column is ignored + /// but for (anti) left outer semi join, a 1/0 (uint8) field is pushed back + /// indicating whether right table has matching row(s), see comment in ASTTableJoin::Kind for details. + if (executor.join().join_type() == tipb::JoinType::TypeLeftOuterSemiJoin || executor.join().join_type() == tipb::JoinType::TypeAntiLeftOuterSemiJoin) + { + /// Note: within DAGRequest tidb doesn't have specific field type info for this column + /// therefore, we just use tinyType and default values to construct a new one as tidb does in `PlanBuilder::buildSemiJoin` + tipb::FieldType field_type{}; + field_type.set_tp(TiDB::TypeTiny); + field_type.set_charset("binary"); + field_type.set_collate(TiDB::ITiDBCollator::BINARY); + field_type.set_flag(0); + field_type.set_flen(-1); + field_type.set_decimal(-1); + output_field_types.push_back(field_type); + } + else if (executor.join().join_type() != tipb::JoinType::TypeSemiJoin && executor.join().join_type() != tipb::JoinType::TypeAntiSemiJoin) { /// for semi/anti semi join, the right table column is ignored for (auto & field_type : children_output_field_types[1]) diff --git a/dbms/src/Flash/tests/gtest_collation.cpp b/dbms/src/Flash/tests/gtest_collation.cpp index abae9782774..a201eb29597 100644 --- a/dbms/src/Flash/tests/gtest_collation.cpp +++ b/dbms/src/Flash/tests/gtest_collation.cpp @@ -325,7 +325,7 @@ try { /// Check collation for executors auto request = context.scan(join_table, "t1") - .join(context.scan(join_table, "t2"), {col("a")}, ASTTableJoin::Kind::Inner) + .join(context.scan(join_table, "t2"), {col("a")}, tipb::JoinType::TypeInnerJoin) .aggregation({Max(col("a")), Min(col("a")), Count(col("a"))}, {col("b")}) .build(context); ASSERT_EQ(checkExecutorCollation(request).size(), 0); diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp index 75a0857465e..1afb876c6b3 100644 --- a/dbms/src/Flash/tests/gtest_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_interpreter.cpp @@ -404,11 +404,11 @@ try table2.join( table3.join(table4, {col("join_c")}, - ASTTableJoin::Kind::Left), + tipb::JoinType::TypeLeftOuterJoin), {col("join_c")}, - ASTTableJoin::Kind::Left), + tipb::JoinType::TypeLeftOuterJoin), {col("join_c")}, - ASTTableJoin::Kind::Left) + tipb::JoinType::TypeLeftOuterJoin) .build(context); String expected = R"( @@ -446,11 +446,11 @@ CreatingSets receiver2.join( receiver3.join(receiver4, {col("join_c")}, - ASTTableJoin::Kind::Left), + tipb::JoinType::TypeLeftOuterJoin), {col("join_c")}, - ASTTableJoin::Kind::Left), + tipb::JoinType::TypeLeftOuterJoin), {col("join_c")}, - ASTTableJoin::Kind::Left) + tipb::JoinType::TypeLeftOuterJoin) .build(context); String expected = R"( @@ -488,11 +488,11 @@ CreatingSets receiver2.join( receiver3.join(receiver4, {col("join_c")}, - ASTTableJoin::Kind::Left), + tipb::JoinType::TypeLeftOuterJoin), {col("join_c")}, - ASTTableJoin::Kind::Left), + tipb::JoinType::TypeLeftOuterJoin), {col("join_c")}, - ASTTableJoin::Kind::Left) + tipb::JoinType::TypeLeftOuterJoin) .exchangeSender(tipb::PassThrough) .build(context); @@ -534,7 +534,7 @@ try auto request = table1.join( table2, {col("join_c")}, - ASTTableJoin::Kind::Left) + tipb::JoinType::TypeLeftOuterJoin) .aggregation({Max(col("r_a"))}, {col("join_c")}) .build(context); String expected = R"( @@ -563,7 +563,7 @@ CreatingSets auto request = table1.join( table2, {col("join_c")}, - ASTTableJoin::Kind::Right) + tipb::JoinType::TypeRightOuterJoin) .aggregation({Max(col("r_a"))}, {col("join_c")}) .build(context); String expected = R"( @@ -595,7 +595,7 @@ CreatingSets auto request = receiver1.join( receiver2, {col("join_c")}, - ASTTableJoin::Kind::Right) + tipb::JoinType::TypeRightOuterJoin) .aggregation({Sum(col("r_a"))}, {col("join_c")}) .exchangeSender(tipb::PassThrough) .limit(10) diff --git a/dbms/src/Flash/tests/gtest_join_executor.cpp b/dbms/src/Flash/tests/gtest_join_executor.cpp index 70f059d2183..0b6b33d81ef 100644 --- a/dbms/src/Flash/tests/gtest_join_executor.cpp +++ b/dbms/src/Flash/tests/gtest_join_executor.cpp @@ -14,6 +14,7 @@ #include +#include #include namespace DB @@ -22,7 +23,7 @@ namespace tests { class JoinExecutorTestRunner : public DB::tests::ExecutorTest { - static const size_t max_concurrency_level = 10; + static constexpr size_t max_concurrency_level = 10; public: void initializeContext() override @@ -48,22 +49,6 @@ class JoinExecutorTestRunner : public DB::tests::ExecutorTest {toVec("s", {"banana", "banana"}), toVec("join_c", {"apple", "banana"})}); - context.addMockTable("simple_test", "t1", {{"a", TiDB::TP::TypeString}, {"b", TiDB::TP::TypeString}}, {toNullableVec("a", {"1", "2", {}, "1", {}}), toNullableVec("b", {"3", "4", "3", {}, {}})}); - - context.addMockTable("simple_test", "t2", {{"a", TiDB::TP::TypeString}, {"b", TiDB::TP::TypeString}}, {toNullableVec("a", {"1", "3", {}, "1", {}}), toNullableVec("b", {"3", "4", "3", {}, {}})}); - - context.addMockTable("multi_test", "t1", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}, {"c", TiDB::TP::TypeLong}}, {toVec("a", {1, 3, 0}), toVec("b", {2, 2, 0}), toVec("c", {3, 2, 0})}); - - context.addMockTable("multi_test", "t2", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}, {"c", TiDB::TP::TypeLong}}, {toVec("a", {3, 3, 0}), toVec("b", {4, 2, 0}), toVec("c", {5, 3, 0})}); - - context.addMockTable("multi_test", "t3", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec("a", {1, 2, 0}), toVec("b", {2, 2, 0})}); - - context.addMockTable("multi_test", "t4", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec("a", {3, 2, 0}), toVec("b", {4, 2, 0})}); - - context.addMockTable("join_agg", "t1", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec("a", {1, 1, 3, 4}), toVec("b", {1, 1, 4, 1})}); - - context.addMockTable("join_agg", "t2", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec("a", {1, 4, 2}), toVec("b", {2, 6, 2})}); - context.addExchangeReceiver("exchange_r_table", {{"s1", TiDB::TP::TypeString}, {"join_c", TiDB::TP::TypeString}}, {toNullableVec("s", {"banana", "banana"}), @@ -75,11 +60,6 @@ class JoinExecutorTestRunner : public DB::tests::ExecutorTest toNullableVec("join_c", {"apple", "banana"})}); } - std::tuple multiTestScan() - { - return {context.scan("multi_test", "t1"), context.scan("multi_test", "t2"), context.scan("multi_test", "t3"), context.scan("multi_test", "t4")}; - } - void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) { for (size_t i = 1; i <= max_concurrency_level; ++i) @@ -87,201 +67,236 @@ class JoinExecutorTestRunner : public DB::tests::ExecutorTest ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i)); } } -}; - -TEST_F(JoinExecutorTestRunner, SimpleInnerJoin) -try -{ - auto request = context.scan("simple_test", "t1") - .join(context.scan("simple_test", "t2"), {col("a")}, ASTTableJoin::Kind::Inner) - .build(context); - { - executeWithConcurrency(request, {toNullableVec({"1", "1", "1", "1"}), toNullableVec({{}, "3", {}, "3"}), toNullableVec({"1", "1", "1", "1"}), toNullableVec({"3", "3", {}, {}})}); - } - request = context.scan("simple_test", "t2") - .join(context.scan("simple_test", "t1"), {col("a")}, ASTTableJoin::Kind::Inner) - .build(context); - { - executeWithConcurrency(request, {toNullableVec({"1", "1", "1", "1"}), toNullableVec({{}, "3", {}, "3"}), toNullableVec({"1", "1", "1", "1"}), toNullableVec({"3", "3", {}, {}})}); - } + static constexpr size_t join_type_num = 7; - request = context.scan("simple_test", "t1") - .join(context.scan("simple_test", "t2"), {col("b")}, ASTTableJoin::Kind::Inner) - .build(context); - { - executeWithConcurrency(request, {toNullableVec({{}, "1", "2", {}, "1"}), toNullableVec({"3", "3", "4", "3", "3"}), toNullableVec({"1", "1", "3", {}, {}}), toNullableVec({"3", "3", "4", "3", "3"})}); - } - - request = context.scan("simple_test", "t2") - .join(context.scan("simple_test", "t1"), {col("b")}, ASTTableJoin::Kind::Inner) - .build(context); - { - executeWithConcurrency(request, {toNullableVec({{}, "1", "3", {}, "1"}), toNullableVec({"3", "3", "4", "3", "3"}), toNullableVec({"1", "1", "2", {}, {}}), toNullableVec({"3", "3", "4", "3", "3"})}); - } -} -CATCH + static constexpr tipb::JoinType join_types[join_type_num] = { + tipb::JoinType::TypeInnerJoin, + tipb::JoinType::TypeLeftOuterJoin, + tipb::JoinType::TypeRightOuterJoin, + tipb::JoinType::TypeSemiJoin, + tipb::JoinType::TypeAntiSemiJoin, + tipb::JoinType::TypeLeftOuterSemiJoin, + tipb::JoinType::TypeAntiLeftOuterSemiJoin, + }; +}; -TEST_F(JoinExecutorTestRunner, SimpleLeftJoin) +TEST_F(JoinExecutorTestRunner, SimpleJoin) try { - auto request = context.scan("simple_test", "t1") - .join(context.scan("simple_test", "t2"), {col("a")}, ASTTableJoin::Kind::Left) - .build(context); - { - executeWithConcurrency(request, {toNullableVec({"1", "1", "2", {}, "1", "1", {}}), toNullableVec({"3", "3", "4", "3", {}, {}, {}}), toNullableVec({"1", "1", {}, {}, "1", "1", {}}), toNullableVec({{}, "3", {}, {}, {}, "3", {}})}); - } + constexpr size_t simple_test_num = 4; - request = context.scan("simple_test", "t2") - .join(context.scan("simple_test", "t1"), {col("a")}, ASTTableJoin::Kind::Left) - .build(context); - { - executeWithConcurrency(request, {toNullableVec({"1", "1", "3", {}, "1", "1", {}}), toNullableVec({"3", "3", "4", "3", {}, {}, {}}), toNullableVec({"1", "1", {}, {}, "1", "1", {}}), toNullableVec({{}, "3", {}, {}, {}, "3", {}})}); - } + context.addMockTable("simple_test", "t1", {{"a", TiDB::TP::TypeString}, {"b", TiDB::TP::TypeString}}, {toNullableVec("a", {"1", "2", {}, "1", {}}), toNullableVec("b", {"3", "4", "3", {}, {}})}); + context.addMockTable("simple_test", "t2", {{"a", TiDB::TP::TypeString}, {"b", TiDB::TP::TypeString}}, {toNullableVec("a", {"1", "3", {}, "1", {}}), toNullableVec("b", {"3", "4", "3", {}, {}})}); - request = context.scan("simple_test", "t1") - .join(context.scan("simple_test", "t2"), {col("b")}, ASTTableJoin::Kind::Left) - .build(context); - { - executeWithConcurrency(request, {toNullableVec({"1", "1", "2", {}, {}, "1", {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}}), toNullableVec({{}, "1", "3", {}, "1", {}, {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}})}); - } + // names of left table, right table and join key column + const std::tuple join_cases[simple_test_num] = { + std::make_tuple("t1", "t2", "a"), + std::make_tuple("t2", "t1", "a"), + std::make_tuple("t1", "t2", "b"), + std::make_tuple("t2", "t1", "b"), + }; - request = context.scan("simple_test", "t2") - .join(context.scan("simple_test", "t1"), {col("b")}, ASTTableJoin::Kind::Left) - .build(context); + const ColumnsWithTypeAndName expected_cols[simple_test_num * join_type_num] = { + // inner join + {toNullableVec({"1", "1", "1", "1"}), toNullableVec({{}, "3", {}, "3"}), toNullableVec({"1", "1", "1", "1"}), toNullableVec({"3", "3", {}, {}})}, + {toNullableVec({"1", "1", "1", "1"}), toNullableVec({{}, "3", {}, "3"}), toNullableVec({"1", "1", "1", "1"}), toNullableVec({"3", "3", {}, {}})}, + {toNullableVec({{}, "1", "2", {}, "1"}), toNullableVec({"3", "3", "4", "3", "3"}), toNullableVec({"1", "1", "3", {}, {}}), toNullableVec({"3", "3", "4", "3", "3"})}, + {toNullableVec({{}, "1", "3", {}, "1"}), toNullableVec({"3", "3", "4", "3", "3"}), toNullableVec({"1", "1", "2", {}, {}}), toNullableVec({"3", "3", "4", "3", "3"})}, + // left join + {toNullableVec({"1", "1", "2", {}, "1", "1", {}}), toNullableVec({"3", "3", "4", "3", {}, {}, {}}), toNullableVec({"1", "1", {}, {}, "1", "1", {}}), toNullableVec({{}, "3", {}, {}, {}, "3", {}})}, + {toNullableVec({"1", "1", "3", {}, "1", "1", {}}), toNullableVec({"3", "3", "4", "3", {}, {}, {}}), toNullableVec({"1", "1", {}, {}, "1", "1", {}}), toNullableVec({{}, "3", {}, {}, {}, "3", {}})}, + {toNullableVec({"1", "1", "2", {}, {}, "1", {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}}), toNullableVec({{}, "1", "3", {}, "1", {}, {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}})}, + {toNullableVec({"1", "1", "3", {}, {}, "1", {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}}), toNullableVec({{}, "1", "2", {}, "1", {}, {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}})}, + // right join + {toNullableVec({"1", "1", {}, {}, "1", "1", {}}), toNullableVec({{}, "3", {}, {}, {}, "3", {}}), toNullableVec({"1", "1", "3", {}, "1", "1", {}}), toNullableVec({"3", "3", "4", "3", {}, {}, {}})}, + {toNullableVec({"1", "1", {}, {}, "1", "1", {}}), toNullableVec({{}, "3", {}, {}, {}, "3", {}}), toNullableVec({"1", "1", "2", {}, "1", "1", {}}), toNullableVec({"3", "3", "4", "3", {}, {}, {}})}, + {toNullableVec({{}, "1", "2", {}, "1", {}, {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}}), toNullableVec({"1", "1", "3", {}, {}, "1", {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}})}, + {toNullableVec({{}, "1", "3", {}, "1", {}, {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}}), toNullableVec({"1", "1", "2", {}, {}, "1", {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}})}, + // semi join + {toNullableVec({"1", "1"}), toNullableVec({"3", {}})}, + {toNullableVec({"1", "1"}), toNullableVec({"3", {}})}, + {toNullableVec({"1", "2", {}}), toNullableVec({"3", "4", "3"})}, + {toNullableVec({"1", "3", {}}), toNullableVec({"3", "4", "3"})}, + // anti semi join + {toNullableVec({"2", {}, {}}), toNullableVec({"4", "3", {}})}, + {toNullableVec({"3", {}, {}}), toNullableVec({"4", "3", {}})}, + {toNullableVec({"1", {}}), toNullableVec({{}, {}})}, + {toNullableVec({"1", {}}), toNullableVec({{}, {}})}, + // left outer semi join + {toNullableVec({"1", "2", {}, "1", {}}), toNullableVec({"3", "4", "3", {}, {}}), toNullableVec({1, 0, 0, 1, 0})}, + {toNullableVec({"1", "3", {}, "1", {}}), toNullableVec({"3", "4", "3", {}, {}}), toNullableVec({1, 0, 0, 1, 0})}, + {toNullableVec({"1", "2", {}, "1", {}}), toNullableVec({"3", "4", "3", {}, {}}), toNullableVec({1, 1, 1, 0, 0})}, + {toNullableVec({"1", "3", {}, "1", {}}), toNullableVec({"3", "4", "3", {}, {}}), toNullableVec({1, 1, 1, 0, 0})}, + // anti left outer semi join + {toNullableVec({"1", "2", {}, "1", {}}), toNullableVec({"3", "4", "3", {}, {}}), toNullableVec({0, 1, 1, 0, 1})}, + {toNullableVec({"1", "3", {}, "1", {}}), toNullableVec({"3", "4", "3", {}, {}}), toNullableVec({0, 1, 1, 0, 1})}, + {toNullableVec({"1", "2", {}, "1", {}}), toNullableVec({"3", "4", "3", {}, {}}), toNullableVec({0, 0, 0, 1, 1})}, + {toNullableVec({"1", "3", {}, "1", {}}), toNullableVec({"3", "4", "3", {}, {}}), toNullableVec({0, 0, 0, 1, 1})}, + }; + + for (size_t i = 0; i < join_type_num; ++i) { - executeWithConcurrency(request, {toNullableVec({"1", "1", "3", {}, {}, "1", {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}}), toNullableVec({{}, "1", "2", {}, "1", {}, {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}})}); + for (size_t j = 0; j < simple_test_num; ++j) + { + const auto & [l, r, k] = join_cases[j]; + auto request = context.scan("simple_test", l) + .join(context.scan("simple_test", r), {col(k)}, join_types[i]) + .build(context); + + { + executeWithConcurrency(request, expected_cols[i * simple_test_num + j]); + } + } } } CATCH -TEST_F(JoinExecutorTestRunner, SimpleRightJoin) +TEST_F(JoinExecutorTestRunner, MultiJoin) try { - auto request = context - .scan("simple_test", "t1") - .join(context.scan("simple_test", "t2"), {col("a")}, ASTTableJoin::Kind::Right) - .build(context); - { - executeWithConcurrency(request, {toNullableVec({"1", "1", {}, {}, "1", "1", {}}), toNullableVec({{}, "3", {}, {}, {}, "3", {}}), toNullableVec({"1", "1", "3", {}, "1", "1", {}}), toNullableVec({"3", "3", "4", "3", {}, {}, {}})}); - } - - request = context - .scan("simple_test", "t2") - .join(context.scan("simple_test", "t1"), {col("a")}, ASTTableJoin::Kind::Right) - .build(context); - { - executeWithConcurrency(request, {toNullableVec({"1", "1", {}, {}, "1", "1", {}}), toNullableVec({{}, "3", {}, {}, {}, "3", {}}), toNullableVec({"1", "1", "2", {}, "1", "1", {}}), toNullableVec({"3", "3", "4", "3", {}, {}, {}})}); - } - - request = context - .scan("simple_test", "t1") - .join(context.scan("simple_test", "t2"), {col("b")}, ASTTableJoin::Kind::Right) - .build(context); - { - executeWithConcurrency(request, {toNullableVec({{}, "1", "2", {}, "1", {}, {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}}), toNullableVec({"1", "1", "3", {}, {}, "1", {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}})}); - } + context.addMockTable("multi_test", "t1", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}, {"c", TiDB::TP::TypeLong}}, {toVec("a", {1, 3, 0}), toVec("b", {2, 2, 0}), toVec("c", {3, 2, 0})}); + + context.addMockTable("multi_test", "t2", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}, {"c", TiDB::TP::TypeLong}}, {toVec("a", {3, 3, 0}), toVec("b", {4, 2, 0}), toVec("c", {5, 3, 0})}); + + context.addMockTable("multi_test", "t3", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec("a", {1, 2, 0}), toVec("b", {2, 2, 0})}); + + context.addMockTable("multi_test", "t4", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec("a", {3, 2, 0}), toVec("b", {4, 2, 0})}); + + const ColumnsWithTypeAndName expected_cols[join_type_num * join_type_num] = { + /// inner x inner x inner + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0})}, + /// inner x left x inner + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0})}, + /// inner x right x inner + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0})}, + /// inner x semi x inner + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0})}, + /// inner x anti semi x inner + {toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({})}, + /// inner x left outer semi x inner + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({1, 1, 1})}, + /// inner x anti left outer semi x inner + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({0, 0, 0})}, + + /// left x inner x left + {toNullableVec({1, 1, 3, 3, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 2, 2, 0}), toNullableVec({3, 3, 2, 2, 2, 2, 0}), toNullableVec({{}, {}, 3, 3, 3, 3, 0}), toNullableVec({{}, {}, 4, 4, 2, 2, 0}), toNullableVec({{}, {}, 5, 5, 3, 3, 0}), toNullableVec({1, 2, 1, 2, 1, 2, 0}), toNullableVec({2, 2, 2, 2, 2, 2, 0}), toNullableVec({{}, 2, {}, 2, {}, 2, 0}), toNullableVec({{}, 2, {}, 2, {}, 2, 0})}, + /// left x left x left + {toNullableVec({1, 1, 3, 3, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 2, 2, 0}), toNullableVec({3, 3, 2, 2, 2, 2, 0}), toNullableVec({{}, {}, 3, 3, 3, 3, 0}), toNullableVec({{}, {}, 2, 2, 4, 4, 0}), toNullableVec({{}, {}, 3, 3, 5, 5, 0}), toNullableVec({2, 1, 2, 1, 2, 1, 0}), toNullableVec({2, 2, 2, 2, 2, 2, 0}), toNullableVec({2, {}, 2, {}, 2, {}, 0}), toNullableVec({2, {}, 2, {}, 2, {}, 0})}, + /// left x right x left + {toNullableVec({1, 3, 3, 1, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 2, 2, 0}), toNullableVec({3, 2, 2, 3, 2, 2, 0}), toNullableVec({{}, 3, 3, {}, 3, 3, 0}), toNullableVec({{}, 4, 2, {}, 4, 2, 0}), toNullableVec({{}, 5, 3, {}, 5, 3, 0}), toNullableVec({1, 1, 1, 2, 2, 2, 0}), toNullableVec({2, 2, 2, 2, 2, 2, 0}), toNullableVec({{}, {}, {}, 2, 2, 2, 0}), toNullableVec({{}, {}, {}, 2, 2, 2, 0})}, + /// left x semi x left + {toNullableVec({1, 3, 3, 0}), toNullableVec({2, 2, 2, 0}), toNullableVec({3, 2, 2, 0}), toNullableVec({{}, 3, 3, 0}), toNullableVec({{}, 4, 2, 0}), toNullableVec({{}, 5, 3, 0})}, + /// left x anti semi x left + {toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({})}, + /// left x left outer semi x left + {toNullableVec({1, 3, 3, 0}), toNullableVec({2, 2, 2, 0}), toNullableVec({3, 2, 2, 0}), toNullableVec({{}, 3, 3, 0}), toNullableVec({{}, 4, 2, 0}), toNullableVec({{}, 5, 3, 0}), toNullableVec({1, 1, 1, 1})}, + /// left x anti left outer semi x left + {toNullableVec({1, 3, 3, 0}), toNullableVec({2, 2, 2, 0}), toNullableVec({3, 2, 2, 0}), toNullableVec({{}, 3, 3, 0}), toNullableVec({{}, 4, 2, 0}), toNullableVec({{}, 5, 3, 0}), toNullableVec({0, 0, 0, 0})}, + + /// right x inner x right + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0})}, + /// right x left x right + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0})}, + /// right x right x right + {toNullableVec({{}, 3, 3, 0}), toNullableVec({{}, 2, 2, 0}), toNullableVec({{}, 2, 2, 0}), toNullableVec({{}, 3, 3, 0}), toNullableVec({{}, 4, 2, 0}), toNullableVec({{}, 5, 3, 0}), toNullableVec({{}, 2, 2, 0}), toNullableVec({{}, 2, 2, 0}), toNullableVec({3, 2, 2, 0}), toNullableVec({4, 2, 2, 0})}, + /// right x semi x right + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0})}, + /// right x anti semi x right + {toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({})}, + /// right x left outer semi x right + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({1, 1, 1})}, + /// right x anti left outer semi x right + {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({0, 0, 0})}, + + /// semi x inner x semi + {toNullableVec({3, 0}), toNullableVec({2, 0}), toNullableVec({2, 0}), toNullableVec({2, 0}), toNullableVec({2, 0})}, + /// semi x left x semi + {toNullableVec({3, 0}), toNullableVec({2, 0}), toNullableVec({2, 0}), toNullableVec({2, 0}), toNullableVec({2, 0})}, + /// semi x right x semi + {toNullableVec({3, 0}), toNullableVec({2, 0}), toNullableVec({2, 0}), toNullableVec({2, 0}), toNullableVec({2, 0})}, + /// semi x semi x semi + {toNullableVec({3, 0}), toNullableVec({2, 0}), toNullableVec({2, 0})}, + /// semi x anti semi x semi + {toNullableVec({}), toNullableVec({}), toNullableVec({})}, + /// semi x left outer semi x semi + {toNullableVec({3, 0}), toNullableVec({2, 0}), toNullableVec({2, 0}), toNullableVec({1, 1})}, + /// semi x anti left outer semi x semi + {toNullableVec({3, 0}), toNullableVec({2, 0}), toNullableVec({2, 0}), toNullableVec({0, 0})}, + + /// anti semi x inner x anti semi + {toNullableVec({1}), toNullableVec({2}), toNullableVec({3}), toNullableVec({1}), toNullableVec({2})}, + /// anti semi x left x anti semi + {toNullableVec({1}), toNullableVec({2}), toNullableVec({3}), toNullableVec({1}), toNullableVec({2})}, + /// anti semi x right x anti semi + {toNullableVec({1}), toNullableVec({2}), toNullableVec({3}), toNullableVec({1}), toNullableVec({2})}, + /// anti semi x semi x anti semi + {toNullableVec({1}), toNullableVec({2}), toNullableVec({3})}, + /// anti semi x anti semi x anti semi + {toNullableVec({}), toNullableVec({}), toNullableVec({})}, + /// anti semi x left outer semi x anti semi + {toNullableVec({1}), toNullableVec({2}), toNullableVec({3}), toNullableVec({1})}, + /// anti semi x left outer anti semi x anti semi + {toNullableVec({1}), toNullableVec({2}), toNullableVec({3}), toNullableVec({0})}, + + /// left outer semi x inner x left outer semi + {toNullableVec({1, 1, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({3, 3, 2, 2, 0}), toNullableVec({0, 0, 1, 1, 1}), toNullableVec({1, 2, 1, 2, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({0, 1, 0, 1, 1})}, + /// left outer semi x left x left outer semi + {toNullableVec({1, 1, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({3, 3, 2, 2, 0}), toNullableVec({0, 0, 1, 1, 1}), toNullableVec({1, 2, 1, 2, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({0, 1, 0, 1, 1})}, + /// left outer semi x right x left outer semi + {toNullableVec({1, 1, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({3, 3, 2, 2, 0}), toNullableVec({0, 0, 1, 1, 1}), toNullableVec({1, 2, 1, 2, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({0, 1, 0, 1, 1})}, + /// left outer semi x semi x left outer semi + {toNullableVec({1, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 2, 0}), toNullableVec({0, 1, 1})}, + /// left outer semi x anti semi x left outer semi + {toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({})}, + /// left outer semi x left outer semi x left outer semi + {toNullableVec({1, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 2, 0}), toNullableVec({0, 1, 1}), toNullableVec({1, 1, 1})}, + /// left outer semi x left outer anti semi x left outer semi + {toNullableVec({1, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 2, 0}), toNullableVec({0, 1, 1}), toNullableVec({0, 0, 0})}, + + /// left outer anti semi x inner x left outer anti semi + {toNullableVec({1, 1, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({3, 3, 2, 2, 0}), toNullableVec({1, 1, 0, 0, 0}), toNullableVec({1, 2, 1, 2, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({1, 0, 1, 0, 0})}, + /// left outer anti semi x left x left outer anti semi + {toNullableVec({1, 1, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({3, 3, 2, 2, 0}), toNullableVec({1, 1, 0, 0, 0}), toNullableVec({1, 2, 1, 2, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({1, 0, 1, 0, 0})}, + /// left outer anti semi x right x left outer anti semi + {toNullableVec({1, 1, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({3, 3, 2, 2, 0}), toNullableVec({1, 1, 0, 0, 0}), toNullableVec({1, 2, 1, 2, 0}), toNullableVec({2, 2, 2, 2, 0}), toNullableVec({1, 0, 1, 0, 0})}, + /// left outer anti semi x semi x left outer anti semi + {toNullableVec({1, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 2, 0}), toNullableVec({1, 0, 0})}, + /// left outer anti semi x anti semi x left outer anti semi + {toNullableVec({}), toNullableVec({}), toNullableVec({}), toNullableVec({})}, + /// left outer anti semi x left outer semi x left outer anti semi + {toNullableVec({1, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 2, 0}), toNullableVec({1, 0, 0}), toNullableVec({1, 1, 1})}, + /// left outer anti semi x left outer anti semi x left outer anti semi + {toNullableVec({1, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 2, 0}), toNullableVec({1, 0, 0}), toNullableVec({0, 0, 0})}, + }; - request = context - .scan("simple_test", "t2") - .join(context.scan("simple_test", "t1"), {col("b")}, ASTTableJoin::Kind::Right) - .build(context); + /// select * from (t1 JT1 t2 using (a)) JT2 (t3 JT1 t4 using (a)) using (b) + for (auto [i, jt1] : ext::enumerate(join_types)) { - executeWithConcurrency(request, {toNullableVec({{}, "1", "3", {}, "1", {}, {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}}), toNullableVec({"1", "1", "2", {}, {}, "1", {}}), toNullableVec({"3", "3", "4", "3", "3", {}, {}})}); + for (auto [j, jt2] : ext::enumerate(join_types)) + { + auto t1 = context.scan("multi_test", "t1"); + auto t2 = context.scan("multi_test", "t2"); + auto t3 = context.scan("multi_test", "t3"); + auto t4 = context.scan("multi_test", "t4"); + auto request = t1.join(t2, {col("a")}, jt1) + .join(t3.join(t4, {col("a")}, jt1), + {col("b")}, + jt2) + .build(context); + + executeWithConcurrency(request, expected_cols[i * join_type_num + j]); + } } } CATCH -TEST_F(JoinExecutorTestRunner, MultiInnerLeftJoin) -try -{ - auto [t1, t2, t3, t4] = multiTestScan(); - auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Inner) - .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Inner), - {col("b")}, - ASTTableJoin::Kind::Left) - .build(context); - - executeWithConcurrency(request, {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0})}); -} -CATCH - -TEST_F(JoinExecutorTestRunner, MultiInnerRightJoin) -try -{ - auto [t1, t2, t3, t4] = multiTestScan(); - auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Inner) - .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Inner), - {col("b")}, - ASTTableJoin::Kind::Right) - .build(context); - - executeWithConcurrency(request, {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0})}); -} -CATCH - -TEST_F(JoinExecutorTestRunner, MultiLeftInnerJoin) -try -{ - auto [t1, t2, t3, t4] = multiTestScan(); - auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Left) - .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Left), - {col("b")}, - ASTTableJoin::Kind::Inner) - .build(context); - - executeWithConcurrency(request, {toNullableVec({1, 1, 3, 3, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 2, 2, 0}), toNullableVec({3, 3, 2, 2, 2, 2, 0}), toNullableVec({{}, {}, 3, 3, 3, 3, 0}), toNullableVec({{}, {}, 4, 4, 2, 2, 0}), toNullableVec({{}, {}, 5, 5, 3, 3, 0}), toNullableVec({1, 2, 1, 2, 1, 2, 0}), toNullableVec({2, 2, 2, 2, 2, 2, 0}), toNullableVec({{}, 2, {}, 2, {}, 2, 0}), toNullableVec({{}, 2, {}, 2, {}, 2, 0})}); -} -CATCH - -TEST_F(JoinExecutorTestRunner, MultiLeftRightJoin) -try -{ - auto [t1, t2, t3, t4] = multiTestScan(); - auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Left) - .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Left), - {col("b")}, - ASTTableJoin::Kind::Right) - .build(context); - - executeWithConcurrency(request, {toNullableVec({1, 3, 3, 1, 3, 3, 0}), toNullableVec({2, 2, 2, 2, 2, 2, 0}), toNullableVec({3, 2, 2, 3, 2, 2, 0}), toNullableVec({{}, 3, 3, {}, 3, 3, 0}), toNullableVec({{}, 4, 2, {}, 4, 2, 0}), toNullableVec({{}, 5, 3, {}, 5, 3, 0}), toNullableVec({1, 1, 1, 2, 2, 2, 0}), toNullableVec({2, 2, 2, 2, 2, 2, 0}), toNullableVec({{}, {}, {}, 2, 2, 2, 0}), toNullableVec({{}, {}, {}, 2, 2, 2, 0})}); -} -CATCH - -TEST_F(JoinExecutorTestRunner, MultiRightInnerJoin) -try -{ - auto [t1, t2, t3, t4] = multiTestScan(); - auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Right) - .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Right), - {col("b")}, - ASTTableJoin::Kind::Inner) - .build(context); - - executeWithConcurrency(request, {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0})}); -} -CATCH - -TEST_F(JoinExecutorTestRunner, MultiRightLeftJoin) -try -{ - auto [t1, t2, t3, t4] = multiTestScan(); - auto request = t1.join(t2, {col("a")}, ASTTableJoin::Kind::Right) - .join(t3.join(t4, {col("a")}, ASTTableJoin::Kind::Right), - {col("b")}, - ASTTableJoin::Kind::Left) - .build(context); - - executeWithConcurrency(request, {toNullableVec({3, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({3, 3, 0}), toNullableVec({4, 2, 0}), toNullableVec({5, 3, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0}), toNullableVec({2, 2, 0})}); -} -CATCH - TEST_F(JoinExecutorTestRunner, JoinCast) try { auto cast_request = [&]() { return context.scan("cast", "t1") - .join(context.scan("cast", "t2"), {col("a")}, ASTTableJoin::Kind::Inner) + .join(context.scan("cast", "t2"), {col("a")}, tipb::JoinType::TypeInnerJoin) .build(context); }; @@ -360,31 +375,28 @@ CATCH TEST_F(JoinExecutorTestRunner, JoinAgg) try { - auto request = context.scan("join_agg", "t1") - .join(context.scan("join_agg", "t2"), {col("a")}, ASTTableJoin::Kind::Inner) - .aggregation({Max(col("a")), Min(col("a")), Count(col("a"))}, {col("b")}) - .build(context); - - { - executeWithConcurrency(request, {toNullableVec({4}), toNullableVec({1}), toVec({3}), toNullableVec({1})}); - } - - request = context.scan("join_agg", "t1") - .join(context.scan("join_agg", "t2"), {col("a")}, ASTTableJoin::Kind::Left) - .aggregation({Max(col("a")), Min(col("a")), Count(col("a"))}, {col("b")}) - .build(context); + context.addMockTable("join_agg", "t1", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec("a", {1, 1, 3, 4}), toVec("b", {1, 1, 4, 1})}); + + context.addMockTable("join_agg", "t2", {{"a", TiDB::TP::TypeLong}, {"b", TiDB::TP::TypeLong}}, {toVec("a", {1, 4, 2}), toVec("b", {2, 6, 2})}); + + const ColumnsWithTypeAndName expected_cols[join_type_num] = { + {toNullableVec({4}), toNullableVec({1}), toVec({3}), toNullableVec({1})}, + {toNullableVec({4, 3}), toNullableVec({1, 3}), toVec({3, 1}), toNullableVec({1, 4})}, + {toNullableVec({4, {}}), toNullableVec({1, {}}), toVec({3, 0}), toNullableVec({1, {}})}, + {toNullableVec({4}), toNullableVec({1}), toVec({3}), toNullableVec({1})}, + {toNullableVec({3}), toNullableVec({3}), toVec({1}), toNullableVec({4})}, + {toNullableVec({4, 3}), toNullableVec({1, 3}), toVec({3, 1}), toNullableVec({1, 4})}, + {toNullableVec({4, 3}), toNullableVec({1, 3}), toVec({3, 1}), toNullableVec({1, 4})}, + }; + for (auto [i, tp] : ext::enumerate(join_types)) { - executeWithConcurrency(request, {toNullableVec({4, 3}), toNullableVec({1, 3}), toVec({3, 1}), toNullableVec({1, 4})}); - } + auto request = context.scan("join_agg", "t1") + .join(context.scan("join_agg", "t2"), {col("a")}, tp) + .aggregation({Max(col("a")), Min(col("a")), Count(col("a"))}, {col("b")}) + .build(context); - request = context.scan("join_agg", "t1") - .join(context.scan("join_agg", "t2"), {col("a")}, ASTTableJoin::Kind::Right) - .aggregation({Max(col("a")), Min(col("a")), Count(col("a"))}, {col("b")}) - .build(context); - - { - executeWithConcurrency(request, {toNullableVec({4, {}}), toNullableVec({1, {}}), toVec({3, 0}), toNullableVec({1, {}})}); + executeWithConcurrency(request, expected_cols[i]); } } CATCH @@ -394,7 +406,7 @@ try { auto request = context .scan("test_db", "l_table") - .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) + .join(context.scan("test_db", "r_table"), {col("join_c")}, tipb::JoinType::TypeLeftOuterJoin) .build(context); { executeWithConcurrency(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"}), toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); @@ -402,7 +414,7 @@ try request = context .scan("test_db", "l_table") - .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) + .join(context.scan("test_db", "r_table"), {col("join_c")}, tipb::JoinType::TypeLeftOuterJoin) .project({"s", "join_c"}) .build(context); { @@ -411,7 +423,7 @@ try request = context .scan("test_db", "l_table") - .join(context.scan("test_db", "r_table_2"), {col("join_c")}, ASTTableJoin::Kind::Left) + .join(context.scan("test_db", "r_table_2"), {col("join_c")}, tipb::JoinType::TypeLeftOuterJoin) .build(context); { executeWithConcurrency(request, {toNullableVec({"banana", "banana", "banana", "banana"}), toNullableVec({"apple", "apple", "apple", "banana"}), toNullableVec({"banana", "banana", "banana", {}}), toNullableVec({"apple", "apple", "apple", {}})}); @@ -424,7 +436,7 @@ try { auto request = context .receive("exchange_l_table") - .join(context.receive("exchange_r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) + .join(context.receive("exchange_r_table"), {col("join_c")}, tipb::JoinType::TypeLeftOuterJoin) .build(context); { executeWithConcurrency(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"}), toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); @@ -437,7 +449,7 @@ try { auto request = context .scan("test_db", "l_table") - .join(context.receive("exchange_r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) + .join(context.receive("exchange_r_table"), {col("join_c")}, tipb::JoinType::TypeLeftOuterJoin) .build(context); { executeWithConcurrency(request, {toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"}), toNullableVec({"banana", "banana"}), toNullableVec({"apple", "banana"})}); diff --git a/dbms/src/Flash/tests/gtest_split_tasks.cpp b/dbms/src/Flash/tests/gtest_split_tasks.cpp index c637073f32f..60708fd9906 100644 --- a/dbms/src/Flash/tests/gtest_split_tasks.cpp +++ b/dbms/src/Flash/tests/gtest_split_tasks.cpp @@ -86,7 +86,7 @@ try { auto tasks = context .scan("test_db", "l_table") - .join(context.scan("test_db", "r_table"), {col("join_c")}, ASTTableJoin::Kind::Left) + .join(context.scan("test_db", "r_table"), {col("join_c")}, tipb::JoinType::TypeLeftOuterJoin) .topN("join_c", false, 2) .buildMPPTasks(context); @@ -135,4 +135,4 @@ CATCH } // namespace tests -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index 863bbf925b3..de65ab692c8 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -246,23 +246,23 @@ DAGRequestBuilder & DAGRequestBuilder::exchangeSender(tipb::ExchangeType exchang DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAstVec exprs) { - return join(right, exprs, ASTTableJoin::Kind::Inner); + return join(right, exprs, tipb::TypeInnerJoin); } -DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAstVec exprs, ASTTableJoin::Kind kind) +DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, MockAstVec exprs, tipb::JoinType tp) { assert(root); assert(right.root); - auto join_ast = std::make_shared(); - auto exp_list = std::make_shared(); + + // todo(ljr): support `on` expression + auto using_expr_list = std::make_shared(); for (const auto & expr : exprs) { - exp_list->children.push_back(expr); + using_expr_list->children.push_back(expr); } - join_ast->using_expression_list = exp_list; - join_ast->strictness = ASTTableJoin::Strictness::All; - join_ast->kind = kind; - root = compileJoin(getExecutorIndex(), root, right.root, join_ast); + + root = compileJoin(getExecutorIndex(), root, right.root, tp, using_expr_list); + return *this; } diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 4eec611e06a..2fce59c4e97 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -95,7 +95,7 @@ class DAGRequestBuilder // Currently only support inner join, left join and right join. // TODO support more types of join. DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAstVec exprs); - DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAstVec exprs, ASTTableJoin::Kind kind); + DAGRequestBuilder & join(const DAGRequestBuilder & right, MockAstVec exprs, tipb::JoinType tp); // aggregation DAGRequestBuilder & aggregation(ASTPtr agg_func, ASTPtr group_by_expr); diff --git a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp index 72f0bb505d1..1956758e373 100644 --- a/dbms/src/TestUtils/tests/gtest_mock_executors.cpp +++ b/dbms/src/TestUtils/tests/gtest_mock_executors.cpp @@ -180,7 +180,7 @@ try DAGRequestBuilder left_builder = context.scan("test_db", "l_table") .topN({{"l_a", false}}, 10) - .join(right_builder, {col("join_c")}, ASTTableJoin::Kind::Left) // todo ensure the join is legal. + .join(right_builder, {col("join_c")}, tipb::JoinType::TypeLeftOuterJoin) // todo ensure the join is legal. .limit(10); auto request = left_builder.build(context); { @@ -265,4 +265,4 @@ try } CATCH } // namespace tests -} // namespace DB \ No newline at end of file +} // namespace DB From f6f7b0faf60df37d309f12baf90d81f6cc76eb7a Mon Sep 17 00:00:00 2001 From: Calvin Neo Date: Fri, 29 Jul 2022 15:19:12 +0800 Subject: [PATCH 11/17] Fix RegionEpoch Error and CheckWaitIndex takes too long (#5498) close pingcap/tiflash#5458 --- contrib/tiflash-proxy | 2 +- dbms/src/Storages/Transaction/KVStore.cpp | 18 ++++++++++++------ dbms/src/Storages/Transaction/KVStore.h | 4 ++-- dbms/src/Storages/Transaction/ProxyFFI.cpp | 4 ++-- dbms/src/Storages/Transaction/ProxyFFI.h | 2 +- .../Transaction/tests/gtest_kvstore.cpp | 4 ++-- 6 files changed, 20 insertions(+), 14 deletions(-) diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index cdd5996980e..f30ca6b1079 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit cdd5996980ecbe5e8d9fe597ec620a5fe394d586 +Subproject commit f30ca6b1079a7945165cc1469940a6c8beb24218 diff --git a/dbms/src/Storages/Transaction/KVStore.cpp b/dbms/src/Storages/Transaction/KVStore.cpp index 690c7a61ab2..a4d9fab1fd0 100644 --- a/dbms/src/Storages/Transaction/KVStore.cpp +++ b/dbms/src/Storages/Transaction/KVStore.cpp @@ -331,17 +331,17 @@ bool KVStore::needFlushRegionData(UInt64 region_id, TMTContext & tmt) { auto region_task_lock = region_manager.genRegionTaskLock(region_id); const RegionPtr curr_region_ptr = getRegion(region_id); - return canFlushRegionDataImpl(curr_region_ptr, false, false, tmt, region_task_lock); + return canFlushRegionDataImpl(curr_region_ptr, false, false, tmt, region_task_lock, 0, 0); } -bool KVStore::tryFlushRegionData(UInt64 region_id, bool try_until_succeed, TMTContext & tmt) +bool KVStore::tryFlushRegionData(UInt64 region_id, bool try_until_succeed, TMTContext & tmt, UInt64 index, UInt64 term) { auto region_task_lock = region_manager.genRegionTaskLock(region_id); const RegionPtr curr_region_ptr = getRegion(region_id); - return canFlushRegionDataImpl(curr_region_ptr, true, try_until_succeed, tmt, region_task_lock); + return canFlushRegionDataImpl(curr_region_ptr, true, try_until_succeed, tmt, region_task_lock, index, term); } -bool KVStore::canFlushRegionDataImpl(const RegionPtr & curr_region_ptr, UInt8 flush_if_possible, bool try_until_succeed, TMTContext & tmt, const RegionTaskLock & region_task_lock) +bool KVStore::canFlushRegionDataImpl(const RegionPtr & curr_region_ptr, UInt8 flush_if_possible, bool try_until_succeed, TMTContext & tmt, const RegionTaskLock & region_task_lock, UInt64 index, UInt64 term) { if (curr_region_ptr == nullptr) { @@ -369,7 +369,12 @@ bool KVStore::canFlushRegionDataImpl(const RegionPtr & curr_region_ptr, UInt8 fl } if (can_flush && flush_if_possible) { - LOG_FMT_DEBUG(log, "{} flush region due to canFlushRegionData", curr_region.toString(false)); + LOG_FMT_DEBUG(log, "{} flush region due to canFlushRegionData, index {} term {}", curr_region.toString(false), index, term); + if (index) + { + // We set actual index when handling CompactLog. + curr_region.handleWriteRaftCmd({}, index, term, tmt); + } if (tryFlushRegionCacheInStorage(tmt, curr_region, log, try_until_succeed)) { persistRegion(curr_region, region_task_lock, "canFlushRegionData before compact raft log"); @@ -408,7 +413,6 @@ EngineStoreApplyRes KVStore::handleUselessAdminRaftCmd( term, index); - curr_region.handleWriteRaftCmd({}, index, term, tmt); if (cmd_type == raft_cmdpb::AdminCmdType::CompactLog) { @@ -418,6 +422,8 @@ EngineStoreApplyRes KVStore::handleUselessAdminRaftCmd( // ref. https://github.com/pingcap/tidb-engine-ext/blob/e83a37d2d8d8ae1778fe279c5f06a851f8c9e56a/components/raftstore/src/engine_store_ffi/observer.rs#L175 return EngineStoreApplyRes::Persist; } + + curr_region.handleWriteRaftCmd({}, index, term, tmt); return EngineStoreApplyRes::None; } diff --git a/dbms/src/Storages/Transaction/KVStore.h b/dbms/src/Storages/Transaction/KVStore.h index b58083557a1..4cce3b80f5b 100644 --- a/dbms/src/Storages/Transaction/KVStore.h +++ b/dbms/src/Storages/Transaction/KVStore.h @@ -109,7 +109,7 @@ class KVStore final : private boost::noncopyable EngineStoreApplyRes handleWriteRaftCmd(const WriteCmdsView & cmds, UInt64 region_id, UInt64 index, UInt64 term, TMTContext & tmt); bool needFlushRegionData(UInt64 region_id, TMTContext & tmt); - bool tryFlushRegionData(UInt64 region_id, bool try_until_succeed, TMTContext & tmt); + bool tryFlushRegionData(UInt64 region_id, bool try_until_succeed, TMTContext & tmt, UInt64 index, UInt64 term); void handleApplySnapshot(metapb::Region && region, uint64_t peer_id, const SSTViewVec, uint64_t index, uint64_t term, TMTContext & tmt); @@ -225,7 +225,7 @@ class KVStore final : private boost::noncopyable /// Notice that if flush_if_possible is set to false, we only check if a flush is allowed by rowsize/size/interval. /// It will not check if a flush will eventually succeed. /// In other words, `canFlushRegionDataImpl(flush_if_possible=true)` can return false. - bool canFlushRegionDataImpl(const RegionPtr & curr_region_ptr, UInt8 flush_if_possible, bool try_until_succeed, TMTContext & tmt, const RegionTaskLock & region_task_lock); + bool canFlushRegionDataImpl(const RegionPtr & curr_region_ptr, UInt8 flush_if_possible, bool try_until_succeed, TMTContext & tmt, const RegionTaskLock & region_task_lock, UInt64 index, UInt64 term); void persistRegion(const Region & region, const RegionTaskLock & region_task_lock, const char * caller); void releaseReadIndexWorkers(); diff --git a/dbms/src/Storages/Transaction/ProxyFFI.cpp b/dbms/src/Storages/Transaction/ProxyFFI.cpp index d4ba50d5714..59d014f8dc9 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFI.cpp @@ -142,12 +142,12 @@ uint8_t NeedFlushData(EngineStoreServerWrap * server, uint64_t region_id) } } -uint8_t TryFlushData(EngineStoreServerWrap * server, uint64_t region_id, uint8_t until_succeed) +uint8_t TryFlushData(EngineStoreServerWrap * server, uint64_t region_id, uint8_t until_succeed, uint64_t index, uint64_t term) { try { auto & kvstore = server->tmt->getKVStore(); - return kvstore->tryFlushRegionData(region_id, until_succeed, *server->tmt); + return kvstore->tryFlushRegionData(region_id, until_succeed, *server->tmt, index, term); } catch (...) { diff --git a/dbms/src/Storages/Transaction/ProxyFFI.h b/dbms/src/Storages/Transaction/ProxyFFI.h index aafe4b375eb..a4607396942 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.h +++ b/dbms/src/Storages/Transaction/ProxyFFI.h @@ -126,7 +126,7 @@ EngineStoreApplyRes HandleWriteRaftCmd(const EngineStoreServerWrap * server, WriteCmdsView cmds, RaftCmdHeader header); uint8_t NeedFlushData(EngineStoreServerWrap * server, uint64_t region_id); -uint8_t TryFlushData(EngineStoreServerWrap * server, uint64_t region_id, uint8_t until_succeed); +uint8_t TryFlushData(EngineStoreServerWrap * server, uint64_t region_id, uint8_t until_succeed, uint64_t index, uint64_t term); void AtomicUpdateProxy(EngineStoreServerWrap * server, RaftStoreProxyFFIHelper * proxy); void HandleDestroy(EngineStoreServerWrap * server, uint64_t region_id); EngineStoreApplyRes HandleIngestSST(EngineStoreServerWrap * server, SSTViewVec snaps, RaftCmdHeader header); diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index e157d711f1d..180be7e65b7 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -111,7 +111,7 @@ void RegionKVStoreTest::testNewProxy() ASSERT_EQ(kvs.handleAdminRaftCmd(std::move(request), std::move(response), 1, 5, 1, ctx.getTMTContext()), EngineStoreApplyRes::Persist); // Filter - ASSERT_EQ(kvs.tryFlushRegionData(1, false, ctx.getTMTContext()), false); + ASSERT_EQ(kvs.tryFlushRegionData(1, false, ctx.getTMTContext(), 0, 0), false); } } @@ -1241,7 +1241,7 @@ void RegionKVStoreTest::testKVStore() // There shall be data to flush. ASSERT_EQ(kvs.needFlushRegionData(19, ctx.getTMTContext()), true); // Force flush until succeed only for testing. - ASSERT_EQ(kvs.tryFlushRegionData(19, true, ctx.getTMTContext()), true); + ASSERT_EQ(kvs.tryFlushRegionData(19, true, ctx.getTMTContext(), 0, 0), true); } } From b4ee52efd59357a1c6a2d3db50519c0a40c9526d Mon Sep 17 00:00:00 2001 From: JaySon Date: Fri, 29 Jul 2022 20:13:12 +0800 Subject: [PATCH 12/17] test: Simplify comparing the stream read result (#5435) close pingcap/tiflash#5452 --- dbms/src/Core/Block.cpp | 18 +- dbms/src/Core/Block.h | 2 + dbms/src/Core/ColumnWithTypeAndName.cpp | 21 +- dbms/src/Core/ColumnWithTypeAndName.h | 3 + .../DeltaMerge/DMSegmentThreadInputStream.h | 8 +- .../Storages/DeltaMerge/DeltaMergeHelpers.h | 18 +- .../src/Storages/DeltaMerge/tests/DMTestEnv.h | 49 +- .../DeltaMerge/tests/gtest_column_filter.cpp | 89 +- .../DeltaMerge/tests/gtest_data_streams.cpp | 3 +- .../tests/gtest_dm_delta_index_manager.cpp | 6 +- .../tests/gtest_dm_delta_merge_store.cpp | 1315 +++++------------ ...est_dm_delta_merge_store_for_fast_mode.cpp | 848 +++-------- .../DeltaMerge/tests/gtest_dm_file.cpp | 713 +++------ .../tests/gtest_dm_minmax_index.cpp | 5 +- .../DeltaMerge/tests/gtest_dm_segment.cpp | 649 ++------ .../tests/gtest_dm_segment_common_handle.cpp | 470 +++--- .../tests/gtest_dm_storage_delta_merge.cpp | 91 +- .../tests/gtest_segment_test_basic.cpp | 31 +- .../DeltaMerge/tests/gtest_version_filter.cpp | 72 +- dbms/src/TestUtils/FunctionTestUtils.cpp | 80 +- dbms/src/TestUtils/FunctionTestUtils.h | 21 +- dbms/src/TestUtils/InputStreamTestUtils.cpp | 350 +++++ dbms/src/TestUtils/InputStreamTestUtils.h | 75 + .../tests/gtest_function_test_utils.cpp | 13 +- .../tests/gtest_inputstream_test_utils.cpp | 302 ++++ .../TestUtils/tests/gtest_print_columns.cpp | 7 +- 26 files changed, 2092 insertions(+), 3167 deletions(-) create mode 100644 dbms/src/TestUtils/InputStreamTestUtils.cpp create mode 100644 dbms/src/TestUtils/InputStreamTestUtils.h create mode 100644 dbms/src/TestUtils/tests/gtest_inputstream_test_utils.cpp diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 971e8f36e2a..bd67e59df77 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -300,7 +300,7 @@ std::string Block::dumpNames() const out << ", "; out << it->name; } - return out.str(); + return out.releaseStr(); } @@ -313,7 +313,21 @@ std::string Block::dumpStructure() const out << ", "; it->dumpStructure(out); } - return out.str(); + return out.releaseStr(); +} + +std::string Block::dumpJsonStructure() const +{ + WriteBufferFromOwnString out; + out << "["; + for (auto it = data.begin(); it != data.end(); ++it) + { + if (it != data.begin()) + out << ","; + it->dumpJsonStructure(out); + } + out << "]"; + return out.releaseStr(); } diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 713ae85d082..04378c7553f 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -118,6 +118,8 @@ class Block /** List of names, types and lengths of columns. Designed for debugging. */ std::string dumpStructure() const; + std::string dumpJsonStructure() const; + /** Get the same block, but empty. */ Block cloneEmpty() const; diff --git a/dbms/src/Core/ColumnWithTypeAndName.cpp b/dbms/src/Core/ColumnWithTypeAndName.cpp index 644b8cff0bf..56f8e1a36e8 100644 --- a/dbms/src/Core/ColumnWithTypeAndName.cpp +++ b/dbms/src/Core/ColumnWithTypeAndName.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -48,15 +49,29 @@ void ColumnWithTypeAndName::dumpStructure(WriteBuffer & out) const out << ' ' << column->dumpStructure(); else out << " nullptr"; - - out << " " << column_id; } String ColumnWithTypeAndName::dumpStructure() const { WriteBufferFromOwnString out; dumpStructure(out); - return out.str(); + return out.releaseStr(); +} + +void ColumnWithTypeAndName::dumpJsonStructure(WriteBuffer & out) const +{ + out << fmt::format(R"json({{"name":"{}","id":{},"type":{},"column":{}}})json", + name, + column_id, + (type ? "\"" + type->getName() + "\"" : "null"), + (column ? "\"" + column->dumpStructure() + "\"" : "null")); +} + +String ColumnWithTypeAndName::dumpJsonStructure() const +{ + WriteBufferFromOwnString out; + dumpJsonStructure(out); + return out.releaseStr(); } } // namespace DB diff --git a/dbms/src/Core/ColumnWithTypeAndName.h b/dbms/src/Core/ColumnWithTypeAndName.h index 8fff3b1453a..ea889cdb2dd 100644 --- a/dbms/src/Core/ColumnWithTypeAndName.h +++ b/dbms/src/Core/ColumnWithTypeAndName.h @@ -63,6 +63,9 @@ struct ColumnWithTypeAndName void dumpStructure(WriteBuffer & out) const; String dumpStructure() const; + + void dumpJsonStructure(WriteBuffer & out) const; + String dumpJsonStructure() const; }; } // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h b/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h index 33433da10f8..4e1a7b6fd3d 100644 --- a/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h +++ b/dbms/src/Storages/DeltaMerge/DMSegmentThreadInputStream.h @@ -104,7 +104,13 @@ class DMSegmentThreadInputStream : public IProfilingBlockInputStream cur_segment = task->segment; if (is_raw) { - cur_stream = cur_segment->getInputStreamRaw(*dm_context, columns_to_read, task->read_snapshot, task->ranges, filter, do_delete_mark_filter_for_raw); + cur_stream = cur_segment->getInputStreamRaw( + *dm_context, + columns_to_read, + task->read_snapshot, + task->ranges, + filter, + do_delete_mark_filter_for_raw); } else { diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h index 77335e6d9f0..ad585d684c7 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h @@ -46,7 +46,7 @@ inline Handle encodeToPK(T v) inline size_t getPosByColumnId(const Block & block, ColId col_id) { size_t pos = 0; - for (auto & c : block) + for (const auto & c : block) { if (c.column_id == col_id) return pos; @@ -57,7 +57,7 @@ inline size_t getPosByColumnId(const Block & block, ColId col_id) inline ColumnWithTypeAndName tryGetByColumnId(const Block & block, ColId col_id) { - for (auto & c : block) + for (const auto & c : block) { if (c.column_id == col_id) return c; @@ -68,7 +68,7 @@ inline ColumnWithTypeAndName tryGetByColumnId(const Block & block, ColId col_id) // TODO: we should later optimize getByColumnId. inline const ColumnWithTypeAndName & getByColumnId(const Block & block, ColId col_id) { - for (auto & c : block) + for (const auto & c : block) { if (c.column_id == col_id) return c; @@ -105,7 +105,7 @@ inline PaddedPODArray const * toColumnVectorDataPtr(const ColumnPtr & column) { if (column->isColumnConst()) { - auto * const_col = static_cast(column.get()); + const auto * const_col = static_cast(column.get()); const ColumnVector & c = assert_cast &>(const_col->getDataColumn()); return &c.getData(); @@ -191,7 +191,7 @@ inline Block genBlock(const ColumnDefines & column_defines, const Columns & colu Block block; for (size_t i = 0; i < column_defines.size(); ++i) { - auto & c = column_defines[i]; + const auto & c = column_defines[i]; addColumnToBlock(block, c.id, c.name, c.type, columns[i], c.default_value); } return block; @@ -200,7 +200,7 @@ inline Block genBlock(const ColumnDefines & column_defines, const Columns & colu inline Block getNewBlockByHeader(const Block & header, const Block & block) { Block new_block; - for (auto & c : header) + for (const auto & c : header) new_block.insert(block.getByName(c.name)); return new_block; } @@ -215,7 +215,7 @@ inline ColumnDefines getColumnDefinesFromBlock(const Block & block) inline bool hasColumn(const ColumnDefines & columns, const ColId & col_id) { - for (auto & c : columns) + for (const auto & c : columns) { if (c.id == col_id) return true; @@ -231,8 +231,8 @@ inline bool isSameSchema(const Block & a, const Block & b) return false; for (size_t i = 0; i < a.columns(); ++i) { - auto & ca = a.getByPosition(i); - auto & cb = b.getByPosition(i); + const auto & ca = a.getByPosition(i); + const auto & cb = b.getByPosition(i); bool col_ok = ca.column_id == cb.column_id; bool name_ok = ca.name == cb.name; diff --git a/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h b/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h index 84fafbc46ef..6c6343cb96a 100644 --- a/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h +++ b/dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h @@ -41,6 +41,8 @@ namespace tests // Add this so that we can call typeFromString under namespace DB::DM::tests using DB::tests::typeFromString; +using namespace DB::tests; + /// helper functions for comparing HandleRange inline ::testing::AssertionResult HandleRangeCompare( const char * lhs_expr, @@ -96,6 +98,18 @@ inline std::vector createSignedNumbers(size_t beg, size_t end) return values; } +// Mock a common_pk_col that composed by number `rowkey_column_size` of int64 value +inline String genMockCommonHandle(Int64 value, size_t rowkey_column_size) +{ + WriteBufferFromOwnString ss; + for (size_t index = 0; index < rowkey_column_size; ++index) + { + ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); + ::DB::EncodeInt64(value, ss); + } + return ss.releaseStr(); +} + class DMTestEnv { public: @@ -285,13 +299,7 @@ class DMTestEnv for (size_t i = 0; i < num_rows; i++) { Int64 value = reversed ? end - 1 - i : beg + i; - WriteBufferFromOwnString ss; - for (size_t index = 0; index < rowkey_column_size; index++) - { - ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); - ::DB::EncodeInt64(value, ss); - } - values.emplace_back(ss.releaseStr()); + values.emplace_back(genMockCommonHandle(value, rowkey_column_size)); } block.insert(DB::tests::createColumn( std::move(values), @@ -410,16 +418,7 @@ class DMTestEnv const size_t num_rows = 1; if (is_common_handle) { - Strings values; - { - WriteBufferFromOwnString ss; - for (size_t index = 0; index < rowkey_column_size; index++) - { - ::DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); - ::DB::EncodeInt64(pk, ss); - } - values.emplace_back(ss.releaseStr()); - } + Strings values{genMockCommonHandle(pk, rowkey_column_size)}; block.insert(DB::tests::createColumn( std::move(values), pk_name, @@ -466,20 +465,8 @@ class DMTestEnv static RowKeyRange getRowKeyRangeForClusteredIndex(Int64 start, Int64 end, size_t rowkey_column_size) { - WriteBufferFromOwnString ss; - for (size_t i = 0; i < rowkey_column_size; i++) - { - EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); - EncodeInt64(start, ss); - } - RowKeyValue start_key = RowKeyValue(true, std::make_shared(ss.releaseStr())); - ss.restart(); - for (size_t i = 0; i < rowkey_column_size; i++) - { - EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); - EncodeInt64(end, ss); - } - RowKeyValue end_key = RowKeyValue(true, std::make_shared(ss.releaseStr())); + RowKeyValue start_key = RowKeyValue(true, std::make_shared(genMockCommonHandle(start, rowkey_column_size))); + RowKeyValue end_key = RowKeyValue(true, std::make_shared(genMockCommonHandle(end, rowkey_column_size))); return RowKeyRange(start_key, end_key, true, rowkey_column_size); } diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_column_filter.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_column_filter.cpp index f6ca7900a13..4ed58b03e73 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_column_filter.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_column_filter.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include namespace DB { @@ -45,7 +47,7 @@ class DebugBlockInputStream : public BlocksListBlockInputStream bool is_common_handle; }; -BlockInputStreamPtr genColumnFilterInputStream(BlocksList & blocks, const ColumnDefines & columns, bool is_common_handle) +BlockInputStreamPtr genColumnProjInputStream(BlocksList & blocks, const ColumnDefines & columns, bool is_common_handle) { ColumnDefine handle_define( TiDBPkColumnID, @@ -85,34 +87,16 @@ TEST(DeleteFilterTest, NormalCase) ColumnDefines columns = getColumnDefinesFromBlock(blocks.back()); - { - auto in = genDeleteFilterInputStream(blocks, columns, false); - in->readPrefix(); - Block block = in->read(); - ASSERT_EQ(block.rows(), 1); - auto col = block.getByName(str_col_name); - auto val = col.column->getDataAt(0); - ASSERT_EQ(val, "hello"); - - block = in->read(); - ASSERT_EQ(block.rows(), 1); - col = block.getByName(str_col_name); - val = col.column->getDataAt(0); - ASSERT_EQ(val, "world"); - - block = in->read(); - ASSERT_EQ(block.rows(), 1); - col = block.getByName(str_col_name); - val = col.column->getDataAt(0); - ASSERT_EQ(val, "TiFlash"); - - block = in->read(); - ASSERT_FALSE(block); // ensure the stream is ended - in->readSuffix(); - } + auto in = genDeleteFilterInputStream(blocks, columns, false); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({str_col_name}), + createColumns({ + createColumn({"hello", "world", "TiFlash"}), + })); } -TEST(ColumnFilterTest, NormalCase) +TEST(ColumnProjectionTest, NormalCase) { BlocksList blocks; @@ -125,47 +109,22 @@ TEST(ColumnFilterTest, NormalCase) blocks.push_back(DMTestEnv::prepareOneRowBlock(pk_value, 40, 1, str_col_name, "Storage", false, 1)); } + // Only keep the column `str_col_name` ColumnDefines columns = getColumnDefinesFromBlock(blocks.back()); - + for (auto iter = columns.begin(); iter != columns.end(); /**/) { - auto in = genColumnFilterInputStream(blocks, columns, false); - in->readPrefix(); - Block block = in->read(); - ASSERT_EQ(block.rows(), 1); - auto col = block.getByName(str_col_name); - auto val = col.column->getDataAt(0); - ASSERT_EQ(val, "hello"); - - block = in->read(); - ASSERT_EQ(block.rows(), 1); - col = block.getByName(str_col_name); - val = col.column->getDataAt(0); - ASSERT_EQ(val, "world"); - - block = in->read(); - ASSERT_EQ(block.rows(), 1); - col = block.getByName(str_col_name); - val = col.column->getDataAt(0); - ASSERT_EQ(val, ""); - - - block = in->read(); - ASSERT_EQ(block.rows(), 1); - col = block.getByName(str_col_name); - val = col.column->getDataAt(0); - ASSERT_EQ(val, "TiFlash"); - - block = in->read(); - ASSERT_EQ(block.rows(), 1); - col = block.getByName(str_col_name); - val = col.column->getDataAt(0); - ASSERT_EQ(val, "Storage"); - - block = in->read(); - ASSERT_FALSE(block); // ensure the stream is ended - in->readSuffix(); + if (iter->name != str_col_name) + iter = columns.erase(iter); + else + iter++; } + + ASSERT_INPUTSTREAM_BLOCK_UR( + genColumnProjInputStream(blocks, columns, false), + Block({ + createColumn({"hello", "world", "", "TiFlash", "Storage"}, str_col_name), + })); } } // namespace tests } // namespace DM -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_data_streams.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_data_streams.cpp index 00f31bc97e7..9b30bbded9d 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_data_streams.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_data_streams.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB { @@ -23,7 +24,7 @@ namespace DM { namespace tests { -TEST(PKSquash_test, WithExtraSort) +TEST(PKSquashTest, WithExtraSort) { BlocksList blocks; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_index_manager.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_index_manager.cpp index 7b90d23036f..2ccecbe591c 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_index_manager.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_index_manager.cpp @@ -22,10 +22,10 @@ namespace DM { namespace tests { -class DeltaIndexManager_test : public ::testing::Test +class DeltaIndexManagerTest : public ::testing::Test { public: - DeltaIndexManager_test() + DeltaIndexManagerTest() : one_node_size(DefaultDeltaTree().getBytes()) {} @@ -41,7 +41,7 @@ DeltaIndexPtr genDeltaIndex() } -TEST_F(DeltaIndexManager_test, LRU) +TEST_F(DeltaIndexManagerTest, LRU) try { DeltaIndexManager manager(one_node_size * 100); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 19b9e73f4e7..1e48dd4e4af 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -14,12 +14,21 @@ #include #include +#include #include +#include +#include #include #include +#include #include +#include +#include +#include +#include #include +#include namespace DB { @@ -39,7 +48,6 @@ namespace DM { namespace tests { - String testModeToString(const ::testing::TestParamInfo & info) { const auto mode = info.param; @@ -120,6 +128,7 @@ try DMTestEnv::PkType::PkIsHandleInt32, }) { + SCOPED_TRACE(fmt::format("Test case for {}", DMTestEnv::PkTypeToString(pk_type))); LOG_FMT_INFO(log, "Test case for {} begin.", DMTestEnv::PkTypeToString(pk_type)); auto cols = DMTestEnv::getDefaultColumns(pk_type); @@ -144,8 +153,7 @@ try block1 = DeltaMergeStore::addExtraColumnIfNeed(*db_context, store->getHandle(), std::move(block1)); ASSERT_EQ(block1.rows(), nrows); ASSERT_TRUE(block1.has(EXTRA_HANDLE_COLUMN_NAME)); - for (const auto & c : block1) - ASSERT_EQ(c.column->size(), nrows); + ASSERT_NO_THROW({ block1.checkNumberOfRows(); }); // Make a block that is overlapped with `block1` and it should be squashed by `PKSquashingBlockInputStream` size_t nrows_2 = 2; @@ -161,27 +169,12 @@ try block2 = DeltaMergeStore::addExtraColumnIfNeed(*db_context, store->getHandle(), std::move(block2)); ASSERT_EQ(block2.rows(), nrows_2); ASSERT_TRUE(block2.has(EXTRA_HANDLE_COLUMN_NAME)); - for (const auto & c : block2) - ASSERT_EQ(c.column->size(), nrows_2); + ASSERT_NO_THROW({ block2.checkNumberOfRows(); }); BlockInputStreamPtr stream = std::make_shared(BlocksList{block1, block2}); stream = std::make_shared>(stream, EXTRA_HANDLE_COLUMN_ID, store->isCommonHandle()); - - size_t num_rows_read = 0; - stream->readPrefix(); - while (Block block = stream->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - ASSERT_EQ(c->size(), block.rows()) - << "unexpected num of rows for column [name=" << iter.name << "] " << DMTestEnv::PkTypeToString(pk_type); - } - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, nrows + nrows_2); + ASSERT_INPUTSTREAM_NROWS(stream, nrows + nrows_2); LOG_FMT_INFO(log, "Test case for {} done.", DMTestEnv::PkTypeToString(pk_type)); } @@ -268,73 +261,28 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - //printf("pk:%lld\n", c->getInt(i)); - EXPECT_EQ(c->getInt(i), i); - } - else if (iter.name == col_str_define.name) - { - //printf("%s:%s\n", col_str_define.name.c_str(), c->getDataAt(i).data); - EXPECT_EQ(c->getDataAt(i), DB::toString(i)); - } - else if (iter.name == col_i8_define.name) - { - //printf("%s:%lld\n", col_i8_define.name.c_str(), c->getInt(i)); - Int64 num = i * (i % 2 == 0 ? -1 : 1); - EXPECT_EQ(c->getInt(i), num); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_str_define.name, col_i8_define.name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(createNumberStrings(0, num_rows_write)), + createColumn(createSignedNumbers(0, num_rows_write)), + })); } { // test readRaw const auto & columns = store->getTableColumns(); BlockInputStreamPtr in = store->readRaw(*db_context, db_context->getSettingsRef(), columns, 1, /* keep_order= */ false)[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(i), i); - } - else if (iter.name == col_str_define.name) - { - EXPECT_EQ(c->getDataAt(i), DB::toString(i)); - } - else if (iter.name == col_i8_define.name) - { - Int64 num = i * (i % 2 == 0 ? -1 : 1); - EXPECT_EQ(c->getInt(i), num); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_str_define.name, col_i8_define.name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(createNumberStrings(0, num_rows_write)), + createColumn(createSignedNumbers(0, num_rows_write)), + })); } } CATCH @@ -412,15 +360,7 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, 0); + ASSERT_INPUTSTREAM_NROWS(in, 0); } } CATCH @@ -499,38 +439,14 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - //printf("pk:%lld\n", c->getInt(i)); - EXPECT_EQ(c->getInt(i), i); - } - else if (iter.name == col_str_define.name) - { - //printf("%s:%s\n", col_str_define.name.c_str(), c->getDataAt(i).data); - EXPECT_EQ(c->getDataAt(i), DB::toString(i)); - } - else if (iter.name == col_i8_define.name) - { - //printf("%s:%lld\n", col_i8_define.name.c_str(), c->getInt(i)); - Int64 num = i * (i % 2 == 0 ? -1 : 1); - EXPECT_EQ(c->getInt(i), num); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_str_define.name, col_i8_define.name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(createNumberStrings(0, num_rows_write)), + createColumn(createSignedNumbers(0, num_rows_write)), + })); } } CATCH @@ -572,24 +488,12 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - } - - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + })); } // Delete range [0, 64) const size_t num_deleted_rows = 64; @@ -611,25 +515,13 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - // Range after deletion is [64, 128) - ASSERT_EQ(c->getInt(i), i + Int64(num_deleted_rows)); - } - } - } - } - - ASSERT_EQ(num_rows_read, num_rows_write - num_deleted_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + // Range after deletion is [64, 128) + createColumn(createNumbers(num_deleted_rows, num_rows_write)), + })); } } CATCH @@ -698,24 +590,12 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - } - - ASSERT_EQ(num_rows_read, 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, 3 * num_write_rows)), + })); } store = reload(); @@ -785,24 +665,12 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - } - - ASSERT_EQ(num_rows_read, 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, 3 * num_write_rows)), + })); } // Read with version { @@ -818,24 +686,12 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - } - - ASSERT_EQ(num_rows_read, 2 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, 2 * num_write_rows)), + })); } } CATCH @@ -875,23 +731,12 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto & iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); i++) - { - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(i), i); - } - } - } - } - ASSERT_EQ(num_rows_read, 8UL); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, 8)), + })); } { @@ -916,30 +761,12 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - // block_num represents index of current segment - int block_num = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto & iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); i++) - { - if (iter.name == DMTestEnv::pk_name && block_num == 0) - { - EXPECT_EQ(c->getInt(i), i); - } - else if (iter.name == DMTestEnv::pk_name && block_num == 1) - { - EXPECT_EQ(c->getInt(i), i + 4); - } - } - } - block_num++; - } - ASSERT_EQ(num_rows_read, 9UL); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, 9)), + })); } } CATCH @@ -979,13 +806,7 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, num_rows_tso1 + num_rows_tso2); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_tso1 + num_rows_tso2); } { @@ -1004,13 +825,7 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, num_rows_tso1 + num_rows_tso2); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_tso1 + num_rows_tso2); } { @@ -1029,13 +844,7 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, num_rows_tso1); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_tso1); } { @@ -1054,13 +863,7 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, 0UL); + ASSERT_INPUTSTREAM_NROWS(in, 0); } } CATCH @@ -1117,27 +920,14 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expect_pk = 0; - UInt64 expect_tso = tso1; - while (Block block = in->read()) - { - ASSERT_TRUE(block.has(DMTestEnv::pk_name)); - ASSERT_TRUE(block.has(VERSION_COLUMN_NAME)); - auto pk_c = block.getByName(DMTestEnv::pk_name); - auto v_c = block.getByName(VERSION_COLUMN_NAME); - for (size_t i = 0; i < block.rows(); ++i) - { - // std::cerr << "pk:" << pk_c.column->getInt(i) << ", ver:" << v_c.column->getInt(i) << std::endl; - ASSERT_EQ(pk_c.column->getInt(i), expect_pk++); - ASSERT_EQ(v_c.column->getUInt(i), expect_tso); - } - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32UL) << "Data [32, 128) before ingest should be erased, should only get [0, 32)"; + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, VERSION_COLUMN_NAME}), + createColumns({ + createColumn(createNumbers(0, 32)), + createColumn(std::vector(32, tso1)), + })) + << "Data [32, 128) before ingest should be erased, should only get [0, 32)"; } { @@ -1156,28 +946,14 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expect_pk = 0; - UInt64 expect_tso = tso1; - while (Block block = in->read()) - { - ASSERT_TRUE(block.has(DMTestEnv::pk_name)); - ASSERT_TRUE(block.has(VERSION_COLUMN_NAME)); - auto pk_c = block.getByName(DMTestEnv::pk_name); - auto v_c = block.getByName(VERSION_COLUMN_NAME); - for (size_t i = 0; i < block.rows(); ++i) - { - // std::cerr << "pk:" << pk_c.column->getInt(i) << ", ver:" << v_c.column->getInt(i) << std::endl; - ASSERT_EQ(pk_c.column->getInt(i), expect_pk++); - ASSERT_EQ(v_c.column->getUInt(i), expect_tso); - } - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32UL) << "Data [32, 128) after ingest with tso less than: " << tso2 - << " are erased, should only get [0, 32)"; + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, VERSION_COLUMN_NAME}), + createColumns({ + createColumn(createNumbers(0, 32)), + createColumn(std::vector(32, tso1)), + })) + << fmt::format("Data [32, 128) after ingest with tso less than: {} are erased, should only get [0, 32)", tso2); } { @@ -1196,15 +972,7 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32UL + 16) << "The rows number after ingest with tso less than " << tso3 << " is not match"; + ASSERT_INPUTSTREAM_NROWS(in, 32 + 16) << fmt::format("The rows number after ingest with tso less than {} is not match", tso3); } { @@ -1223,13 +991,7 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32UL + (48 - 32) + (256UL - 80)) << "The rows number after ingest is not match"; + ASSERT_INPUTSTREAM_NROWS(in, 32 + (48 - 32) + (256 - 80)) << "The rows number after ingest is not match"; } { @@ -1250,13 +1012,7 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1UL); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, 2UL) << "The rows number of two point get is not match"; + ASSERT_INPUTSTREAM_NROWS(in, 2) << "The rows number of two point get is not match"; } } CATCH @@ -1313,27 +1069,14 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expect_pk = 0; - UInt64 expect_tso = tso1; - while (Block block = in->read()) - { - ASSERT_TRUE(block.has(DMTestEnv::pk_name)); - ASSERT_TRUE(block.has(VERSION_COLUMN_NAME)); - auto pk_c = block.getByName(DMTestEnv::pk_name); - auto v_c = block.getByName(VERSION_COLUMN_NAME); - for (size_t i = 0; i < block.rows(); ++i) - { - // std::cerr << "pk:" << pk_c.column->getInt(i) << ", ver:" << v_c.column->getInt(i) << std::endl; - ASSERT_EQ(pk_c.column->getInt(i), expect_pk++); - ASSERT_EQ(v_c.column->getUInt(i), expect_tso); - } - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32) << "Data [32, 128) before ingest should be erased, should only get [0, 32)"; + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, VERSION_COLUMN_NAME}), + createColumns({ + createColumn(createNumbers(0, 32)), + createColumn(std::vector(32, tso1)), + })) + << "Data [32, 128) before ingest should be erased, should only get [0, 32)"; } { @@ -1352,28 +1095,14 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expect_pk = 0; - UInt64 expect_tso = tso1; - while (Block block = in->read()) - { - ASSERT_TRUE(block.has(DMTestEnv::pk_name)); - ASSERT_TRUE(block.has(VERSION_COLUMN_NAME)); - auto pk_c = block.getByName(DMTestEnv::pk_name); - auto v_c = block.getByName(VERSION_COLUMN_NAME); - for (size_t i = 0; i < block.rows(); ++i) - { - // std::cerr << "pk:" << pk_c.column->getInt(i) << ", ver:" << v_c.column->getInt(i) << std::endl; - ASSERT_EQ(pk_c.column->getInt(i), expect_pk++); - ASSERT_EQ(v_c.column->getUInt(i), expect_tso); - } - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32) << "Data [32, 128) after ingest with tso less than: " << tso2 - << " are erased, should only get [0, 32)"; + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, VERSION_COLUMN_NAME}), + createColumns({ + createColumn(createNumbers(0, 32)), + createColumn(std::vector(32, tso1)), + })) + << fmt::format("Data [32, 128) after ingest with tso less than: {} are erased, should only get [0, 32)", tso2); } { @@ -1392,13 +1121,7 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32 + 128 - 32) << "The rows number after ingest is not match"; + ASSERT_INPUTSTREAM_NROWS(in, 32 + 128 - 32) << "The rows number after ingest is not match"; } } CATCH @@ -1450,27 +1173,14 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expect_pk = 0; - UInt64 expect_tso = tso1; - while (Block block = in->read()) - { - ASSERT_TRUE(block.has(DMTestEnv::pk_name)); - ASSERT_TRUE(block.has(VERSION_COLUMN_NAME)); - auto pk_c = block.getByName(DMTestEnv::pk_name); - auto v_c = block.getByName(VERSION_COLUMN_NAME); - for (size_t i = 0; i < block.rows(); ++i) - { - // std::cerr << "pk:" << pk_c.column->getInt(i) << ", ver:" << v_c.column->getInt(i) << std::endl; - ASSERT_EQ(pk_c.column->getInt(i), expect_pk++); - ASSERT_EQ(v_c.column->getUInt(i), expect_tso); - } - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32) << "Data [32, 128) before ingest should be erased, should only get [0, 32)"; + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, VERSION_COLUMN_NAME}), + createColumns({ + createColumn(createNumbers(0, 32)), + createColumn(std::vector(32, tso1)), + })) + << "Data [32, 128) before ingest should be erased, should only get [0, 32)"; } { @@ -1489,13 +1199,7 @@ try /* expected_block_size= */ 1024); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - num_rows_read += block.rows(); - in->readSuffix(); - EXPECT_EQ(num_rows_read, 32) << "The rows number after ingest is not match"; + ASSERT_INPUTSTREAM_NROWS(in, 32) << "The rows number after ingest is not match"; } } CATCH @@ -1579,33 +1283,12 @@ try BlockInputStreamPtr in = ins[0]; LOG_FMT_TRACE(&Poco::Logger::get(GET_GTEST_FULL_NAME), "start to check data of [1,{}]", num_rows_write_in_total); - - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expected_row_pk = 1; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - for (size_t i = 0; i < c->size(); ++i) - { - auto expected = expected_row_pk++; - auto value = c->getInt(i); - if (value != expected) - { - // Convenient for debug. - EXPECT_EQ(expected, value); - } - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write_in_total); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(1, num_rows_write_in_total + 1)), + })); LOG_FMT_TRACE(&Poco::Logger::get(GET_GTEST_FULL_NAME), "done checking data of [1,{}]", num_rows_write_in_total); } @@ -1695,33 +1378,13 @@ try ASSERT_EQ(col.column_id, col_id_ddl); ASSERT_TRUE(col.type->equals(*col_type_after_ddl)); } - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (size_t i = 0; i < block.rows(); ++i) - { - for (auto && iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - // printf("pk:%lld\n", c->getInt(i)); - EXPECT_EQ(c->getInt(i), Int64(i)); - } - else if (iter.name == col_name_ddl) - { - // printf("%s:%lld\n", col_name_ddl.c_str(), c->getInt(i)); - Int64 num = i * (i % 2 == 0 ? -1 : 1); - EXPECT_EQ(c->getInt(i), num); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_ddl}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(createSignedNumbers(0, num_rows_write)), + })); } } CATCH @@ -1800,26 +1463,12 @@ try const Block head = in->getHeader(); ASSERT_FALSE(head.has(col_name_to_drop)); } - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(i), i); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + })); } } CATCH @@ -1902,35 +1551,14 @@ try ASSERT_TRUE(col.type->equals(*col_type_to_add)); } } - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(i), i); - } - else if (iter.name == col_name_c1) - { - Int64 num = i * (i % 2 == 0 ? -1 : 1); - EXPECT_EQ(c->getInt(i), num); - } - else if (iter.name == col_name_to_add) - { - EXPECT_EQ(c->getInt(i), 0); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_c1, col_name_to_add}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(createSignedNumbers(0, num_rows_write)), + createColumn(std::vector(num_rows_write, 0)), + })); } } CATCH @@ -1990,25 +1618,13 @@ try /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - in->readPrefix(); - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(col_name_to_add)); - const auto & col = block.getByName(col_name_to_add); - ASSERT_DATATYPE_EQ(col.type, col_type_to_add); - ASSERT_EQ(col.name, col_name_to_add); - for (size_t i = 0; i < block.rows(); ++i) - { - Field tmp; - col.column->get(i, tmp); - // There is some loss of precision during the convertion, so we just do a rough comparison - EXPECT_FLOAT_EQ(tmp.get(), 1.123456); - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_to_add}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(std::vector(num_rows_write, 1.123456)), + })); } } CATCH @@ -2068,25 +1684,13 @@ try /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - in->readPrefix(); - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(col_name_to_add)); - const auto & col = block.getByName(col_name_to_add); - ASSERT_DATATYPE_EQ(col.type, col_type_to_add); - ASSERT_EQ(col.name, col_name_to_add); - for (size_t i = 0; i < block.rows(); ++i) - { - Field tmp; - col.column->get(i, tmp); - // There is some loss of precision during the convertion, so we just do a rough comparison - EXPECT_FLOAT_EQ(tmp.get(), 1.123456); - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_to_add}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(std::vector(num_rows_write, 1.123456)), + })); } } CATCH @@ -2145,26 +1749,13 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - - in->readPrefix(); - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(col_name_to_add)); - const auto & col = block.getByName(col_name_to_add); - ASSERT_DATATYPE_EQ(col.type, col_type_to_add); - ASSERT_EQ(col.name, col_name_to_add); - for (size_t i = 0; i < block.rows(); ++i) - { - Field tmp; - col.column->get(i, tmp); - // There is some loss of precision during the convertion, so we just do a rough comparison - EXPECT_FLOAT_EQ(tmp.get(), 1.125); - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_to_add}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(std::vector(num_rows_write, 1.125)), + })); } } CATCH @@ -2223,26 +1814,13 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - - in->readPrefix(); - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(col_name_to_add)); - const auto & col = block.getByName(col_name_to_add); - ASSERT_DATATYPE_EQ(col.type, col_type_to_add); - ASSERT_EQ(col.name, col_name_to_add); - for (size_t i = 0; i < block.rows(); ++i) - { - Field tmp; - col.column->get(i, tmp); - // There is some loss of precision during the convertion, so we just do a rough comparison - EXPECT_FLOAT_EQ(tmp.get(), 1); - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_to_add}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(std::vector(num_rows_write, 1)), + })); } } CATCH @@ -2302,25 +1880,13 @@ try /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - in->readPrefix(); - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(col_name_to_add)); - const auto & col = block.getByName(col_name_to_add); - ASSERT_DATATYPE_EQ(col.type, col_type_to_add); - ASSERT_EQ(col.name, col_name_to_add); - for (size_t i = 0; i < block.rows(); ++i) - { - Field tmp; - col.column->get(i, tmp); - // There is some loss of precision during the convertion, so we just do a rough comparison - EXPECT_FLOAT_EQ(tmp.get(), 1); - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_to_add}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(std::vector(num_rows_write, 1)), + })); } } CATCH @@ -2378,22 +1944,17 @@ try /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(col_name_to_add)); - const auto & col = block.getByName(col_name_to_add); - ASSERT_DATATYPE_EQ(col.type, col_type_to_add); - ASSERT_EQ(col.name, col_name_to_add); - for (size_t i = 0; i < block.rows(); i++) - { - EXPECT_EQ((*col.column)[i].get(), mydatetime_uint); // Timestamp for '1999-09-09 12:34:56' - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + std::vector datetime_data( + num_rows_write, + MyDateTime(1999, 9, 9, 12, 34, 56, 0).toPackedUInt()); + + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_to_add}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(/*data_type_args=*/std::make_tuple(0), datetime_data), + })); } } CATCH @@ -2452,25 +2013,13 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - - in->readPrefix(); - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(col_name_to_add)); - const auto & col = block.getByName(col_name_to_add); - ASSERT_DATATYPE_EQ(col.type, col_type_to_add); - ASSERT_EQ(col.name, col_name_to_add); - for (size_t i = 0; i < block.rows(); ++i) - { - Field tmp; - col.column->get(i, tmp); - EXPECT_EQ(tmp.get(), String("test_add_string_col")); - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_to_add}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(Strings(num_rows_write, "test_add_string_col")), + })); } } CATCH @@ -2557,32 +2106,13 @@ try ASSERT_THROW(head.getByName(col_name_before_ddl), ::DB::Exception); } - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - //printf("pk:%lld\n", c->getInt(i)); - EXPECT_EQ(c->getInt(i), i); - } - else if (iter.name == col_name_after_ddl) - { - //printf("col2:%s\n", c->getDataAt(i).data); - Int64 num = i * (i % 2 == 0 ? -1 : 1); - EXPECT_EQ(c->getInt(i), num); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_after_ddl}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(createSignedNumbers(0, num_rows_write)), + })); } } CATCH @@ -2692,27 +2222,12 @@ try // check old col name is not exist ASSERT_THROW(head.getByName(col_name_before_ddl), ::DB::Exception); } - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == col_name_after_ddl) - { - //printf("col2:%s\n", c->getDataAt(i).data); - EXPECT_EQ(c->getInt(i), Int64(i)); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({col_name_after_ddl}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + })); } { @@ -2755,27 +2270,12 @@ try // check old col name is not exist ASSERT_THROW(head.getByName(col_name_before_ddl), ::DB::Exception); } - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == col_name_after_ddl) - { - //printf("col2:%s\n", c->getDataAt(i).data); - EXPECT_EQ(c->getInt(i), Int64(i)); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write * 2); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({col_name_after_ddl}), + createColumns({ + createColumn(createNumbers(0, num_rows_write * 2)), + })); } } } @@ -2844,26 +2344,13 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - - in->readPrefix(); - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(col_name_to_add)); - const auto & col = block.getByName(col_name_to_add); - ASSERT_DATATYPE_EQ(col.type, col_type_to_add); - ASSERT_EQ(col.name, col_name_to_add); - for (size_t i = 0; i < block.rows(); ++i) - { - Field tmp; - col.column->get(i, tmp); - // There is some loss of precision during the convertion, so we just do a rough comparison - EXPECT_FLOAT_EQ(std::abs(tmp.get()), 1.125); - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_to_add}), + createColumns({ + createColumn(std::vector{0}), + createColumn(std::vector{1.125}), + })); } { @@ -2900,21 +2387,14 @@ try /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write * 2); - ASSERT_TRUE(block.has(col_name_to_add)); - const auto & col = block.getByName(col_name_to_add); - ASSERT_DATATYPE_EQ(col.type, col_type_to_add); - ASSERT_EQ(col.name, col_name_to_add); - Field tmp; - tmp = (*col.column)[0]; - EXPECT_FLOAT_EQ(tmp.get(), 1.125); // fill with default value - tmp = (*col.column)[1]; - EXPECT_FLOAT_EQ(tmp.get(), 3.1415); // keep the value we inserted - } - in->readSuffix(); + // FIXME!!! + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_name_to_add}), + createColumns({ + createColumn(std::vector{0, 1}), + createColumn(std::vector{1.125, 3.1415}), + })); } } CATCH @@ -2946,7 +2426,7 @@ try { const ColumnDefine col_str_define(2, "col2", std::make_shared()); const ColumnDefine col_i8_define(3, "i8", std::make_shared()); - size_t rowkey_column_size = 2; + const size_t rowkey_column_size = 2; { auto table_column_defines = DMTestEnv::getDefaultColumns(DMTestEnv::PkType::CommonHandle); table_column_defines->emplace_back(col_str_define); @@ -3018,71 +2498,42 @@ try /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](i).get(), i, rowkey_column_size); - } - else if (iter.name == col_str_define.name) - { - //printf("%s:%s\n", col_str_define.name.c_str(), c->getDataAt(i).data); - EXPECT_EQ(c->getDataAt(i), DB::toString(i)); - } - else if (iter.name == col_i8_define.name) - { - //printf("%s:%lld\n", col_i8_define.name.c_str(), c->getInt(i)); - Int64 num = i * (i % 2 == 0 ? -1 : 1); - EXPECT_EQ(c->getInt(i), num); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + // mock common handle + auto common_handle_coldata = []() { + auto tmp = createNumbers(0, num_rows_write); + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_i8_define.name, col_str_define.name}), + createColumns({ + createColumn(common_handle_coldata), + createColumn(createSignedNumbers(0, num_rows_write)), + createColumn(createNumberStrings(0, num_rows_write)), + })); } { // test readRaw const auto & columns = store->getTableColumns(); BlockInputStreamPtr in = store->readRaw(*db_context, db_context->getSettingsRef(), columns, 1, /* keep_order= */ false)[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](i).get(), i, rowkey_column_size); - } - else if (iter.name == col_str_define.name) - { - EXPECT_EQ(c->getDataAt(i), DB::toString(i)); - } - else if (iter.name == col_i8_define.name) - { - Int64 num = i * (i % 2 == 0 ? -1 : 1); - EXPECT_EQ(c->getInt(i), num); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + // mock common handle + auto common_handle_coldata = []() { + auto tmp = createNumbers(0, num_rows_write); + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_i8_define.name, col_str_define.name}), + createColumns({ + createColumn(common_handle_coldata), + createColumn(createSignedNumbers(0, num_rows_write)), + createColumn(createNumberStrings(0, num_rows_write)), + })); } } CATCH @@ -3148,24 +2599,20 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](i).get(), i, rowkey_column_size); - } - } - } - } - - ASSERT_EQ(num_rows_read, 3 * num_write_rows); + // mock common handle + auto common_handle_coldata = []() { + auto tmp = createNumbers(0, 3 * num_write_rows); + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } store = reload(table_column_defines, true, rowkey_column_size); @@ -3224,24 +2671,20 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](i).get(), i, rowkey_column_size); - } - } - } - } - - ASSERT_EQ(num_rows_read, 3 * num_write_rows); + // mock common handle + auto common_handle_coldata = []() { + auto tmp = createNumbers(0, 3 * num_write_rows); + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } // Read with version { @@ -3257,24 +2700,20 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](i).get(), i, rowkey_column_size); - } - } - } - } - - ASSERT_EQ(num_rows_read, 2 * num_write_rows); + // mock common handle + auto common_handle_coldata = []() { + auto tmp = createNumbers(0, 2 * num_write_rows); + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), 2 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } } CATCH @@ -3283,7 +2722,7 @@ TEST_P(DeltaMergeStoreRWTest, DeleteReadWithCommonHandle) try { const size_t num_rows_write = 128; - size_t rowkey_column_size = 2; + const size_t rowkey_column_size = 2; { // Create a block with sequential Int64 handle in range [0, 128) auto table_column_difines = DMTestEnv::getDefaultColumns(DMTestEnv::PkType::CommonHandle); @@ -3317,42 +2756,27 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](i).get(), i, rowkey_column_size); - } - } - } - } - - ASSERT_EQ(num_rows_read, num_rows_write); + // mock common handle + auto common_handle_coldata = []() { + auto tmp = createNumbers(0, num_rows_write); + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } // Delete range [0, 64) const size_t num_deleted_rows = 64; { - WriteBufferFromOwnString ss; - DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); - DB::EncodeInt64(Int64(0), ss); - DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); - DB::EncodeInt64(Int64(0), ss); - RowKeyValue start(true, std::make_shared(ss.releaseStr())); - - ss.restart(); - DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); - DB::EncodeInt64(Int64(num_deleted_rows), ss); - DB::EncodeUInt(static_cast(TiDB::CodecFlagInt), ss); - DB::EncodeInt64(Int64(num_deleted_rows), ss); - RowKeyValue end(true, std::make_shared(ss.str())); - RowKeyRange range(start, end, true, 2); + RowKeyValue start(true, std::make_shared(genMockCommonHandle(0, rowkey_column_size))); + RowKeyValue end(true, std::make_shared(genMockCommonHandle(num_deleted_rows, rowkey_column_size))); + RowKeyRange range(start, end, true, rowkey_column_size); store->deleteRange(*db_context, db_context->getSettingsRef(), range); } // Read after deletion @@ -3369,28 +2793,20 @@ try /* keep_order= */ false, /* is_fast_mode= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - // Range after deletion is [64, 128) - DMTestEnv::verifyClusteredIndexValue( - c->operator[](i).get(), - i + Int64(num_deleted_rows), - rowkey_column_size); - } - } - } - } - - ASSERT_EQ(num_rows_read, num_rows_write - num_deleted_rows); + // mock common handle, data range after deletion is [64, 128) + auto common_handle_coldata = []() { + auto tmp = createNumbers(num_deleted_rows, num_rows_write); + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), num_rows_write - num_deleted_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } } CATCH @@ -3449,33 +2865,12 @@ try LOG_FMT_TRACE(&Poco::Logger::get(GET_GTEST_FULL_NAME), "start to check data of [1,{}]", num_rows_write_in_total); - size_t num_rows_read = 0; - in->readPrefix(); - Int64 expected_row_pk = 1; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - for (size_t i = 0; i < c->size(); ++i) - { - auto expected = expected_row_pk++; - auto value = c->getInt(i); - if (value != expected) - { - // Convenient for debug. - EXPECT_EQ(expected, value); - } - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write_in_total); - + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(1, num_rows_write_in_total + 1)), + })); LOG_FMT_TRACE(&Poco::Logger::get(GET_GTEST_FULL_NAME), "done checking data of [1,{}]", num_rows_write_in_total); } diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_for_fast_mode.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_for_fast_mode.cpp index 8cfa7c07642..3206f179349 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_for_fast_mode.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store_for_fast_mode.cpp @@ -14,22 +14,20 @@ #include #include +#include #include +#include +#include /// This test file is mainly test on the correctness of read in fast mode. /// Because the basic functions are tested in gtest_dm_delta_merge_storage.cpp, we will not cover it here. namespace DB { -namespace FailPoints -{ -} // namespace FailPoints - namespace DM { namespace tests { - TEST_P(DeltaMergeStoreRWTest, TestFastModeWithOnlyInsertWithoutRangeFilter) { /// test under only insert data (no update, no delete) with all range @@ -92,35 +90,14 @@ TEST_P(DeltaMergeStoreRWTest, TestFastModeWithOnlyInsertWithoutRangeFilter) /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(i), i); - } - else if (iter.name == col_str_define.name) - { - EXPECT_EQ(c->getDataAt(i), DB::toString(i)); - } - else if (iter.name == col_i8_define.name) - { - Int64 num = i * (i % 2 == 0 ? -1 : 1); - EXPECT_EQ(c->getInt(i), num); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_str_define.name, col_i8_define.name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(createNumberStrings(0, num_rows_write)), + createColumn(createSignedNumbers(0, num_rows_write)), + })); } } @@ -173,22 +150,23 @@ TEST_P(DeltaMergeStoreRWTest, TestFastModeWithOnlyInsertWithRangeFilter) } { // read all columns from store with row key range in fast mode + auto read_nums_limit = 64; WriteBufferFromOwnString start_key_ss; DB::EncodeInt64(0, start_key_ss); WriteBufferFromOwnString end_key_ss; - DB::EncodeInt64(64, end_key_ss); - + DB::EncodeInt64(read_nums_limit, end_key_ss); const auto & columns = store->getTableColumns(); + RowKeyRanges key_ranges{RowKeyRange( + RowKeyValue(false, std::make_shared(start_key_ss.releaseStr()), /*int_val_*/ 0), + RowKeyValue(false, std::make_shared(end_key_ss.releaseStr()), /*int_val_*/ read_nums_limit), + false, + store->getRowKeyColumnSize())}; BlockInputStreamPtr in = store->read(*db_context, db_context->getSettingsRef(), columns, - {RowKeyRange( - RowKeyValue(false, std::make_shared(start_key_ss.releaseStr()), 0), - RowKeyValue(false, std::make_shared(end_key_ss.releaseStr()), 64), - false, - store->getRowKeyColumnSize())}, + key_ranges, /* num_streams= */ 1, /* max_version= */ std::numeric_limits::max(), EMPTY_FILTER, @@ -196,35 +174,14 @@ TEST_P(DeltaMergeStoreRWTest, TestFastModeWithOnlyInsertWithRangeFilter) /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(i), i); - } - else if (iter.name == col_str_define.name) - { - EXPECT_EQ(c->getDataAt(i), DB::toString(i)); - } - else if (iter.name == col_i8_define.name) - { - Int64 num = i * (i % 2 == 0 ? -1 : 1); - EXPECT_EQ(c->getInt(i), num); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, 64); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, col_str_define.name, col_i8_define.name}), + createColumns({ + createColumn(createNumbers(0, read_nums_limit)), + createColumn(createNumberStrings(0, read_nums_limit)), + createColumn(createSignedNumbers(0, read_nums_limit)), + })); } } @@ -292,83 +249,57 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - in->readPrefix(); switch (mode) { case TestMode::V1_BlockOnly: case TestMode::V2_BlockOnly: { - while (Block block = in->read()) - { - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i + num_rows_read); - } - } - } - num_rows_read += block.rows(); - } + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(0, 3 * num_write_rows))})); break; } case TestMode::V2_FileOnly: { - while (Block block = in->read()) - { - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i + num_rows_read); - } - } - } - num_rows_read += block.rows(); - } + auto pk_coldata = []() { + std::vector res; + // first [0, 32) + auto tmp = createNumbers(0, num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [32, 64) + tmp = createNumbers(num_write_rows, 2 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [64, 96) + tmp = createNumbers(2 * num_write_rows, 3 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + return res; + }(); + ASSERT_EQ(pk_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({DMTestEnv::pk_name}), createColumns({createColumn(pk_coldata)})); break; } case TestMode::V2_Mix: { - int block_index = 0; - int begin_value = 0; // persist first, then memory, finally stable - while (Block block = in->read()) - { - if (block_index == 1) - { - begin_value = num_write_rows * 2; - } - else if (block_index == 2) - { - begin_value = num_write_rows; - } - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i + begin_value); - } - } - } - num_rows_read += block.rows(); - block_index += 1; - } + // persist first, then memory, finally stable + auto pk_coldata = []() { + std::vector res; + // first [0, 32) + auto tmp = createNumbers(0, num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [64, 96) + tmp = createNumbers(2 * num_write_rows, 3 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [32, 64) + tmp = createNumbers(num_write_rows, 2 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + return res; + }(); + ASSERT_EQ(pk_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({DMTestEnv::pk_name}), createColumns({createColumn(pk_coldata)})); break; } } - - in->readSuffix(); - ASSERT_EQ(num_rows_read, 3 * num_write_rows); } } CATCH @@ -439,82 +370,41 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - in->readPrefix(); switch (mode) { case TestMode::V1_BlockOnly: case TestMode::V2_BlockOnly: - { - while (Block block = in->read()) - { - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - num_rows_read += block.rows(); - } - break; - } case TestMode::V2_FileOnly: { - while (Block block = in->read()) - { - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i + num_rows_read); - } - } - } - num_rows_read += block.rows(); - } + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(0, 3 * num_write_rows))})); break; } case TestMode::V2_Mix: { - int block_index = 0; - int begin_value = 0; - while (Block block = in->read()) - { - if (block_index == 1) - { - begin_value = num_write_rows * 2; - } - else if (block_index == 2) - { - begin_value = num_write_rows; - } - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i + begin_value); - } - } - } - num_rows_read += block.rows(); - block_index += 1; - } + auto pk_coldata = []() { + std::vector res; + // first [0, 32) + auto tmp = createNumbers(0, num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [64, 96) + tmp = createNumbers(2 * num_write_rows, 3 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [32, 64) + tmp = createNumbers(num_write_rows, 2 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + return res; + }(); + ASSERT_EQ(pk_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(pk_coldata)})); break; } } - in->readSuffix(); - ASSERT_EQ(num_rows_read, 3 * num_write_rows); } } CATCH @@ -564,7 +454,6 @@ try file_ids.insert(file_ids.cend(), file_ids3.begin(), file_ids3.end()); store->ingestFiles(dm_context, range, file_ids, false); store->write(*db_context, db_context->getSettingsRef(), block2); - break; } } @@ -587,84 +476,41 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - - in->readPrefix(); switch (mode) { case TestMode::V1_BlockOnly: case TestMode::V2_BlockOnly: - { - while (Block block = in->read()) - { - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - num_rows_read += block.rows(); - } - break; - } case TestMode::V2_FileOnly: { - while (Block block = in->read()) - { - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i + num_rows_read); - } - } - } - num_rows_read += block.rows(); - } + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(0, 3 * num_write_rows))})); break; } case TestMode::V2_Mix: { - int block_index = 0; - int begin_value = 0; - - while (Block block = in->read()) - { - if (block_index == 1) - { - begin_value = num_write_rows * 2; - } - else if (block_index == 2) - { - begin_value = num_write_rows; - } - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i + begin_value); - } - } - } - num_rows_read += block.rows(); - block_index += 1; - } + auto pk_coldata = []() { + std::vector res; + // first [0, 32) + auto tmp = createNumbers(0, num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [64, 96) + tmp = createNumbers(2 * num_write_rows, 3 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [32, 64) + tmp = createNumbers(num_write_rows, 2 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + return res; + }(); + ASSERT_EQ(pk_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(pk_coldata)})); break; } } - in->readSuffix(); - ASSERT_EQ(num_rows_read, 3 * num_write_rows); } } CATCH @@ -739,27 +585,10 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - } - in->readSuffix(); - - ASSERT_EQ(num_rows_read, 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(0, 3 * num_write_rows))})); } } CATCH @@ -835,109 +664,70 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - in->readPrefix(); switch (mode) { case TestMode::V1_BlockOnly: case TestMode::V2_BlockOnly: { - while (Block block = in->read()) - { - for (auto && iter : block) - { - if (iter.name == DMTestEnv::pk_name) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (i < Int64(num_write_rows / 2)) - { - ASSERT_EQ(c->getInt(i), i); - } - else if (i < Int64(2.5 * num_write_rows)) - { - ASSERT_EQ(c->getInt(i), (i - num_write_rows / 2) / 2 + num_write_rows / 2); - } - else - { - ASSERT_EQ(c->getInt(i), (i - num_write_rows * 2) + num_write_rows); - } - } - } - } - num_rows_read += block.rows(); - } + auto pk_coldata = []() { + std::vector res; + // first [0, 128) + auto tmp = createNumbers(0, num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [128, 256) + tmp = createNumbers(num_write_rows, 2 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [128/2, 128 * 1.5) + tmp = createNumbers(num_write_rows / 2, 1.5 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // the pk is sorted by flush cache + std::sort(res.begin(), res.end()); + return res; + }(); + ASSERT_EQ(pk_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({DMTestEnv::pk_name}), createColumns({createColumn(pk_coldata)})); break; } case TestMode::V2_FileOnly: { - auto block_index = 0; - auto begin_value = 0; - - while (Block block = in->read()) - { - if (block_index == 1) - { - begin_value = num_write_rows; - } - else if (block_index == 2) - { - begin_value = num_write_rows / 2; - } - for (auto && iter : block) - { - if (iter.name == DMTestEnv::pk_name) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - ASSERT_EQ(c->getInt(i), i + begin_value); - } - } - } - num_rows_read += block.rows(); - block_index += 1; - } + auto pk_coldata = []() { + std::vector res; + // first [0, 128) + auto tmp = createNumbers(0, num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [128, 256) + tmp = createNumbers(num_write_rows, 2 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [128/2, 128 * 1.5) + tmp = createNumbers(num_write_rows / 2, 1.5 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + return res; + }(); + ASSERT_EQ(pk_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({DMTestEnv::pk_name}), createColumns({createColumn(pk_coldata)})); break; } case TestMode::V2_Mix: { - auto block_index = 0; - auto begin_value = num_write_rows; - - while (Block block = in->read()) - { - if (block_index == 1) - { - begin_value = 0; - } - else if (block_index == 2) - { - begin_value = num_write_rows / 2; - } - for (auto && iter : block) - { - if (iter.name == DMTestEnv::pk_name) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - ASSERT_EQ(c->getInt(i), i + begin_value); - } - } - } - num_rows_read += block.rows(); - block_index += 1; - } - + auto pk_coldata = []() { + std::vector res; + // first [128, 256) + auto tmp = createNumbers(num_write_rows, 2 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [0, 128) + tmp = createNumbers(0, num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [128/2, 128 * 1.5) + tmp = createNumbers(num_write_rows / 2, 1.5 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + return res; + }(); + ASSERT_EQ(pk_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({DMTestEnv::pk_name}), createColumns({createColumn(pk_coldata)})); break; } } - - in->readSuffix(); - ASSERT_EQ(num_rows_read, 3 * num_write_rows); } } CATCH @@ -1021,27 +811,11 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - - in->readPrefix(); // filter del mark = 1, thus just read the insert data before delete - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(0, num_rows_write))})); } store->flushCache(*db_context, RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())); @@ -1063,27 +837,10 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - } - in->readSuffix(); - - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(0, num_rows_write))})); } } CATCH @@ -1095,7 +852,7 @@ try const size_t num_rows_write = 128; { // Create a block with sequential Int64 handle in range [0, 128) - Block block = DMTestEnv::prepareSimpleWriteBlock(0, 128, false); + Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); switch (mode) { @@ -1126,25 +883,10 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(0, num_rows_write))})); } // Delete range [0, 64) const size_t num_deleted_rows = 64; @@ -1166,28 +908,11 @@ try /* keep_order = */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - // filter del mark = 1, thus just read the insert data before delete - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - } - in->readSuffix(); - - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(0, num_rows_write))})); } } CATCH @@ -1198,7 +923,7 @@ try const size_t num_rows_write = 128; { // Create a block with sequential Int64 handle in range [0, 128) - Block block = DMTestEnv::prepareSimpleWriteBlock(0, 128, false); + Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); switch (mode) { @@ -1243,27 +968,12 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i + num_deleted_rows); - } - } - } - } - in->readSuffix(); - - ASSERT_EQ(num_rows_read, num_rows_write - num_deleted_rows); + auto pk_coldata = createNumbers(num_deleted_rows, num_rows_write); + ASSERT_EQ(pk_coldata.size(), num_rows_write - num_deleted_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(pk_coldata)})); } } CATCH @@ -1346,105 +1056,70 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - in->readPrefix(); switch (mode) { case TestMode::V1_BlockOnly: case TestMode::V2_BlockOnly: { - while (Block block = in->read()) - { - for (auto && iter : block) - { - if (iter.name == DMTestEnv::pk_name) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (i < Int64(num_write_rows / 2)) - { - ASSERT_EQ(c->getInt(i), i); - } - else if (i < Int64(2.5 * num_write_rows)) - { - ASSERT_EQ(c->getInt(i), (i - num_write_rows / 2) / 2 + num_write_rows / 2); - } - else - { - ASSERT_EQ(c->getInt(i), (i - num_write_rows * 2) + num_write_rows); - } - } - } - } - num_rows_read += block.rows(); - } + auto pk_coldata = []() { + std::vector res; + // first [0, 128) + auto tmp = createNumbers(0, num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [128, 256) + tmp = createNumbers(num_write_rows, 2 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [128/2, 128 * 1.5) + tmp = createNumbers(num_write_rows / 2, 1.5 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // the pk is sorted by flush cache + std::sort(res.begin(), res.end()); + return res; + }(); + ASSERT_EQ(pk_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({DMTestEnv::pk_name}), createColumns({createColumn(pk_coldata)})); break; } case TestMode::V2_FileOnly: { - auto block_index = 0; - auto begin_value = 0; - while (Block block = in->read()) - { - if (block_index == 1) - { - begin_value = num_write_rows; - } - else if (block_index == 2) - { - begin_value = num_write_rows / 2; - } - for (auto && iter : block) - { - if (iter.name == DMTestEnv::pk_name) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - ASSERT_EQ(c->getInt(i), i + begin_value); - } - } - } - num_rows_read += block.rows(); - block_index += 1; - } + auto pk_coldata = []() { + std::vector res; + // first [0, 128) + auto tmp = createNumbers(0, num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [128, 256) + tmp = createNumbers(num_write_rows, 2 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [128/2, 128 * 1.5) + tmp = createNumbers(num_write_rows / 2, 1.5 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + return res; + }(); + ASSERT_EQ(pk_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({DMTestEnv::pk_name}), createColumns({createColumn(pk_coldata)})); break; } case TestMode::V2_Mix: { - auto block_index = 0; - auto begin_value = num_write_rows; - while (Block block = in->read()) - { - if (block_index == 1) - { - begin_value = 0; - } - else if (block_index == 2) - { - begin_value = num_write_rows / 2; - } - for (auto && iter : block) - { - if (iter.name == DMTestEnv::pk_name) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - ASSERT_EQ(c->getInt(i), i + begin_value); - } - } - } - num_rows_read += block.rows(); - block_index += 1; - } + auto pk_coldata = []() { + std::vector res; + // first [128, 256) + auto tmp = createNumbers(num_write_rows, 2 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [0, 128) + tmp = createNumbers(0, num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + // then [128/2, 128 * 1.5) + tmp = createNumbers(num_write_rows / 2, 1.5 * num_write_rows); + res.insert(res.end(), tmp.begin(), tmp.end()); + return res; + }(); + ASSERT_EQ(pk_coldata.size(), 3 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({DMTestEnv::pk_name}), createColumns({createColumn(pk_coldata)})); break; } } - in->readSuffix(); - ASSERT_EQ(num_rows_read, 3 * num_write_rows); } // Read with version in normal case @@ -1461,27 +1136,10 @@ try /* keep_order= */ false, /* is_raw_read= */ false, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i + num_write_rows / 2); - } - } - } - } - in->readSuffix(); - - ASSERT_EQ(num_rows_read, 1.5 * num_write_rows); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(num_write_rows / 2, 2 * num_write_rows))})); } } CATCH @@ -1530,27 +1188,12 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto && iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); ++i) - { - if (iter.name == DMTestEnv::pk_name) - { - ASSERT_EQ(c->getInt(i), i); - } - } - } - } - in->readSuffix(); - - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + })); } // Delete range [0, 64) @@ -1570,7 +1213,7 @@ try { const auto & columns = store->getTableColumns(); ColumnDefines real_columns; - for (auto & col : columns) + for (const auto & col : columns) { if (col.name != EXTRA_HANDLE_COLUMN_NAME) { @@ -1589,19 +1232,10 @@ try /* keep_order= */ false, /* is_raw_read= */ true, /* expected_block_size= */ 1024)[0]; - size_t num_rows_read = 0; - - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - - ASSERT_EQ(num_rows_read, num_rows_write - num_deleted_rows); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write - num_deleted_rows); } } CATCH } // namespace tests } // namespace DM -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp index 23062f4ffdf..4b29e17ec23 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_file.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -23,7 +24,10 @@ #include #include #include +#include +#include +#include #include namespace DB @@ -38,7 +42,7 @@ namespace DM { namespace tests { -TEST(DMFileWriterFlags_test, SetClearFlags) +TEST(DMFileWriterFlagsTest, SetClearFlags) { using Flags = DMFileWriter::Flags; @@ -83,12 +87,12 @@ String paramToString(const ::testing::TestParamInfo & info) using DMFileBlockOutputStreamPtr = std::shared_ptr; using DMFileBlockInputStreamPtr = std::shared_ptr; -class DMFile_Test +class DMFileTest : public DB::base::TiFlashStorageTestBasic , public testing::WithParamInterface { public: - DMFile_Test() + DMFileTest() : dm_file(nullptr) {} @@ -103,7 +107,7 @@ class DMFile_Test auto configuration = (mode == DMFileMode::DirectoryChecksum ? std::make_optional() : std::nullopt); parent_path = TiFlashStorageTestBasic::getTemporaryPath(); - path_pool = std::make_unique(db_context->getPathPool().withTable("test", "DMFile_Test", false)); + path_pool = std::make_unique(db_context->getPathPool().withTable("test", "DMFileTest", false)); storage_pool = std::make_unique(*db_context, /*ns_id*/ 100, *path_pool, "test.t1"); dm_file = DMFile::create(1, parent_path, single_file_mode, std::move(configuration)); table_columns_ = std::make_shared(); @@ -160,7 +164,7 @@ class DMFile_Test }; -TEST_P(DMFile_Test, WriteRead) +TEST_P(DMFileTest, WriteRead) try { auto cols = DMTestEnv::getDefaultColumns(); @@ -198,23 +202,12 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 cur_pk = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(DMTestEnv::pk_name)); - auto col = in.getByName(DMTestEnv::pk_name); - auto & c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - EXPECT_EQ(c->getInt(i), cur_pk++); - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + })); } /// Test restore the file from disk and read @@ -237,28 +230,17 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 cur_pk = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(DMTestEnv::pk_name)); - auto col = in.getByName(DMTestEnv::pk_name); - auto & c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - EXPECT_EQ(c->getInt(i), cur_pk++); - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + })); } } CATCH -TEST_P(DMFile_Test, GcFlag) +TEST_P(DMFileTest, GcFlag) try { // clean @@ -319,10 +301,10 @@ try } CATCH -/// DMFile_Test.InterruptedDrop_0 and InterruptedDrop_1 test that if deleting file +/// DMFileTest.InterruptedDrop_0 and InterruptedDrop_1 test that if deleting file /// is interrupted by accident, we can safely ignore those broken files. -TEST_P(DMFile_Test, InterruptedDrop_0) +TEST_P(DMFileTest, InterruptedDrop0) try { auto cols = DMTestEnv::getDefaultColumns(); @@ -349,23 +331,12 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 cur_pk = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(DMTestEnv::pk_name)); - auto col = in.getByName(DMTestEnv::pk_name); - auto & c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - EXPECT_EQ(c->getInt(i), cur_pk++); - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + })); } FailPointHelper::enableFailPoint(FailPoints::exception_before_dmfile_remove_encryption); @@ -388,7 +359,7 @@ try } CATCH -TEST_P(DMFile_Test, InterruptedDrop_1) +TEST_P(DMFileTest, InterruptedDrop1) try { auto cols = DMTestEnv::getDefaultColumns(); @@ -415,23 +386,12 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 cur_pk = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(DMTestEnv::pk_name)); - auto col = in.getByName(DMTestEnv::pk_name); - auto & c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - EXPECT_EQ(c->getInt(i), cur_pk++); - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + })); } FailPointHelper::enableFailPoint(FailPoints::exception_before_dmfile_remove_from_disk); @@ -456,7 +416,7 @@ CATCH /// Test reading rows with some filters -TEST_P(DMFile_Test, ReadFilteredByHandle) +TEST_P(DMFileTest, ReadFilteredByHandle) try { auto cols = DMTestEnv::getDefaultColumns(); @@ -482,13 +442,14 @@ try stream->writeSuffix(); } - HandleRanges ranges; - ranges.emplace_back(HandleRange{0, span_per_part}); // only first part - ranges.emplace_back(HandleRange{800, num_rows_write}); - ranges.emplace_back(HandleRange{256, 700}); // - ranges.emplace_back(HandleRange::newNone()); // none - ranges.emplace_back(HandleRange{0, num_rows_write}); // full range - ranges.emplace_back(HandleRange::newAll()); // full range + HandleRanges ranges{ + HandleRange{0, span_per_part}, // only first part + HandleRange{800, num_rows_write}, + HandleRange{256, 700}, // + HandleRange::newNone(), // none + HandleRange{0, num_rows_write}, // full range + HandleRange::newAll(), // full range + }; auto test_read_range = [&](const HandleRange & range) { // Test read DMFileBlockInputStreamBuilder builder(dbContext()); @@ -496,29 +457,17 @@ try .setColumnCache(column_cache_) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::fromHandleRange(range)}); // Filtered by read_range - Int64 num_rows_read = 0; - stream->readPrefix(); Int64 expect_first_pk = int(std::floor(std::max(0, range.start) / span_per_part)) * span_per_part; Int64 expect_last_pk = std::min(num_rows_write, // int(std::ceil(std::min(num_rows_write, range.end) / span_per_part)) * span_per_part + (range.end % span_per_part ? span_per_part : 0)); - Int64 cur_pk = expect_first_pk; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(DMTestEnv::pk_name)); - auto col = in.getByName(DMTestEnv::pk_name); - auto & c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - EXPECT_EQ(c->getInt(i), cur_pk++) - << "range: " << range.toDebugString() << ", cur_pk: " << cur_pk << ", first pk: " << expect_first_pk; - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, expect_last_pk - expect_first_pk) // - << "range: " << range.toDebugString() // - << ", first: " << expect_first_pk << ", last: " << expect_last_pk; + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(expect_first_pk, expect_last_pk)), + })) + << fmt::format("range: {}, first: {}, last: {}", range.toDebugString(), expect_first_pk, expect_last_pk); }; for (const auto & range : ranges) @@ -548,7 +497,7 @@ RSOperatorPtr toRSFilter(const ColumnDefine & cd, const HandleRange & range) } } // namespace -TEST_P(DMFile_Test, ReadFilteredByRoughSetFilter) +TEST_P(DMFileTest, ReadFilteredByRoughSetFilter) try { auto cols = DMTestEnv::getDefaultColumns(); @@ -583,13 +532,14 @@ try stream->writeSuffix(); } - HandleRanges ranges; - ranges.emplace_back(HandleRange{0, span_per_part}); // only first part - ranges.emplace_back(HandleRange{800, num_rows_write}); - ranges.emplace_back(HandleRange{256, 700}); // - ranges.emplace_back(HandleRange::newNone()); // none - ranges.emplace_back(HandleRange{0, num_rows_write}); // full range - ranges.emplace_back(HandleRange::newAll()); // full range + HandleRanges ranges{ + HandleRange{0, span_per_part}, // only first part + HandleRange{800, num_rows_write}, + HandleRange{256, 700}, // + HandleRange::newNone(), // none + HandleRange{0, num_rows_write}, // full range + HandleRange::newAll(), // full range + }; auto test_read_filter = [&](const HandleRange & range) { // Filtered by rough set filter auto filter = toRSFilter(i64_cd, range); @@ -600,29 +550,17 @@ try .setRSOperator(filter) // Filtered by rough set filter .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - Int64 num_rows_read = 0; - stream->readPrefix(); Int64 expect_first_pk = int(std::floor(std::max(0, range.start) / span_per_part)) * span_per_part; Int64 expect_last_pk = std::min(num_rows_write, // int(std::ceil(std::min(num_rows_write, range.end) / span_per_part)) * span_per_part + (range.end % span_per_part ? span_per_part : 0)); - Int64 cur_pk = expect_first_pk; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(i64_cd.name)); - auto col = in.getByName(i64_cd.name); - auto & c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - EXPECT_EQ(c->getInt(i), cur_pk++) - << "range: " << range.toDebugString() << ", cur_pk: " << cur_pk << ", first pk: " << expect_first_pk; - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, expect_last_pk - expect_first_pk) // - << "range: " << range.toDebugString() // - << ", first: " << expect_first_pk << ", last: " << expect_last_pk; + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(expect_first_pk, expect_last_pk)), + })) + << fmt::format("range: {}, first: {}, last: {}", range.toDebugString(), expect_first_pk, expect_last_pk); }; for (const auto & range : ranges) @@ -642,7 +580,7 @@ try CATCH // Test rough filter with some unsupported operations -TEST_P(DMFile_Test, ReadFilteredByRoughSetFilterWithUnsupportedOperation) +TEST_P(DMFileTest, ReadFilteredByRoughSetFilterWithUnsupportedOperation) try { auto cols = DMTestEnv::getDefaultColumns(); @@ -695,25 +633,15 @@ try .setRSOperator(filter) // Filtered by rough set filter .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - Int64 num_rows_read = 0; - stream->readPrefix(); Int64 expect_first_pk = 0; Int64 expect_last_pk = num_rows_should_read; - Int64 cur_pk = expect_first_pk; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(i64_cd.name)); - auto col = in.getByName(i64_cd.name); - auto & c = col.column; - for (size_t j = 0; j < c->size(); j++) - { - EXPECT_EQ(c->getInt(j), cur_pk++) << "cur_pk: " << cur_pk << ", first pk: " << expect_first_pk; - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, expect_last_pk - expect_first_pk) // - << "first: " << expect_first_pk << ", last: " << expect_last_pk; + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(expect_first_pk, expect_last_pk)), + })) + << fmt::format("first: {}, last: {}", expect_first_pk, expect_last_pk); }; for (size_t i = 0; i < filters.size(); ++i) @@ -736,7 +664,7 @@ try } CATCH -TEST_P(DMFile_Test, ReadFilteredByPackIndices) +TEST_P(DMFileTest, ReadFilteredByPackIndices) try { auto cols = DMTestEnv::getDefaultColumns(); @@ -780,9 +708,8 @@ try .setReadPacks(id_set_ptr) // filter by pack index .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - Int64 num_rows_read = 0; - stream->readPrefix(); - Int64 expect_first_pk = 0, expect_last_pk = 0; + Int64 expect_first_pk = 0; + Int64 expect_last_pk = 0; if (id_set_ptr && !id_set_ptr->empty()) { expect_first_pk = *(id_set_ptr->begin()) * span_per_part; @@ -794,24 +721,13 @@ try // not filter if it is nullptr expect_last_pk = num_rows_write; } - - Int64 cur_pk = expect_first_pk; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(DMTestEnv::pk_name)); - auto col = in.getByName(DMTestEnv::pk_name); - auto & c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - EXPECT_EQ(c->getInt(i), cur_pk++) // - << "test index: " << test_index // - << ", cur_pk: " << cur_pk << ", first pk: " << expect_first_pk; - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, expect_last_pk - expect_first_pk) // - << "test index: " << test_index << ", first: " << expect_first_pk << ", last: " << expect_last_pk; + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(createNumbers(expect_first_pk, expect_last_pk)), + })) + << fmt::format("test index: {}, first: {}, last: {}", test_index, expect_first_pk, expect_last_pk); }; for (size_t test_index = 0; test_index <= test_sets.size(); test_index++) { @@ -831,7 +747,7 @@ CATCH /// Test reading different column types -TEST_P(DMFile_Test, NumberTypes) +TEST_P(DMFileTest, NumberTypes) try { auto cols = DMTestEnv::getDefaultColumns(); @@ -870,37 +786,24 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 cur_pk = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(i64_col.name)); - ASSERT_TRUE(in.has(f64_col.name)); - auto i64_c = in.getByName(i64_col.name).column; - auto f64_c = in.getByName(f64_col.name).column; - ASSERT_EQ(i64_c->size(), f64_c->size()); - for (size_t i = 0; i < i64_c->size(); i++) - { - EXPECT_EQ(i64_c->getInt(i), cur_pk++); - Field f = (*f64_c)[i]; - EXPECT_FLOAT_EQ(f.get(), 0.125); - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name, i64_col.name, f64_col.name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(createNumbers(0, num_rows_write)), + createColumn(std::vector(num_rows_write, 0.125)), + })); } } CATCH -TEST_P(DMFile_Test, StringType) +TEST_P(DMFileTest, StringType) try { auto cols = DMTestEnv::getDefaultColumns(); // Prepare columns - ColumnDefine fixed_str_col(2, "str", typeFromString("FixedString(5)")); + ColumnDefine fixed_str_col(2, "str", typeFromString("String")); cols->push_back(fixed_str_col); reload(cols); @@ -929,28 +832,18 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(fixed_str_col.name)); - auto col = in.getByName(fixed_str_col.name); - auto & c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - Field value = (*c)[i]; - EXPECT_EQ(value.get(), "hello"); - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name, fixed_str_col.name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(std::vector(num_rows_write, "hello")), + })); } } CATCH -TEST_P(DMFile_Test, NullableType) +TEST_P(DMFileTest, NullableType) try { auto cols = DMTestEnv::getDefaultColumns(); @@ -966,7 +859,7 @@ try Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); // Half of the column are filled by NULL auto col = nullable_col.type->createColumn(); - for (size_t i = 0; i < 64; i++) + for (size_t i = 0; i < num_rows_write / 2; i++) col->insert(toField(Int64(i))); for (size_t i = 64; i < num_rows_write; i++) col->insertDefault(); @@ -990,59 +883,35 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 cur_pk = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(DMTestEnv::pk_name)); - ASSERT_TRUE(in.has(nullable_col.name)); - auto col = in.getByName(DMTestEnv::pk_name); - auto & c = col.column; - auto ncol = in.getByName(nullable_col.name); - auto & nc = ncol.column; - for (size_t i = 0; i < c->size(); i++) - { - // check nullable column - { - const auto nested_col = typeid_cast(nc.get()); - auto nested = nested_col->getNestedColumnPtr(); - if (cur_pk < 64) - { - EXPECT_FALSE(nested_col->isNullAt(i)); - EXPECT_EQ(nested->getInt(i), cur_pk); - } - else - { - EXPECT_TRUE(nested_col->isNullAt(i)); - } - } - // check pk - EXPECT_EQ(c->getInt(i), cur_pk++); - } - num_rows_read += in.rows(); - } - ASSERT_EQ(num_rows_read, num_rows_write); - stream->readSuffix(); + std::vector nullable_coldata = createNumbers(0, num_rows_write / 2); + nullable_coldata.resize(num_rows_write); + std::vector null_map(num_rows_write, 0); + std::fill(null_map.begin() + num_rows_write / 2, null_map.end(), 1); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name, nullable_col.name}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createNullableColumn(nullable_coldata, null_map), + })); } } CATCH INSTANTIATE_TEST_CASE_P(DTFileMode, // - DMFile_Test, + DMFileTest, testing::Values(DMFileMode::SingleFile, DMFileMode::DirectoryLegacy, DMFileMode::DirectoryChecksum), paramToString); /// DMFile test for clustered index -class DMFile_Clustered_Index_Test : public DB::base::TiFlashStorageTestBasic - , // - public testing::WithParamInterface +class DMFileClusteredIndexTest + : public DB::base::TiFlashStorageTestBasic + , public testing::WithParamInterface { public: - DMFile_Clustered_Index_Test() + DMFileClusteredIndexTest() : dm_file(nullptr) {} @@ -1107,7 +976,7 @@ class DMFile_Clustered_Index_Test : public DB::base::TiFlashStorageTestBasic size_t rowkey_column_size = 2; }; -TEST_P(DMFile_Clustered_Index_Test, WriteRead) +TEST_P(DMFileClusteredIndexTest, WriteRead) try { auto cols = DMTestEnv::getDefaultColumns(is_common_handle ? DMTestEnv::PkType::CommonHandle : DMTestEnv::PkType::HiddenTiDBRowID); @@ -1150,28 +1019,25 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols, RowKeyRanges{RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 cur_pk = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(DMTestEnv::pk_name)); - auto col = in.getByName(DMTestEnv::pk_name); - auto & c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - DMTestEnv::verifyClusteredIndexValue((*c)[i].get(), cur_pk++, rowkey_column_size); - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + // mock common handle + auto common_handle_coldata = [this]() { + std::vector int_coldata = createNumbers(0, num_rows_write); + Strings res; + std::transform(int_coldata.begin(), int_coldata.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } } CATCH -TEST_P(DMFile_Clustered_Index_Test, ReadFilteredByHandle) +TEST_P(DMFileClusteredIndexTest, ReadFilteredByHandle) try { auto cols = DMTestEnv::getDefaultColumns(is_common_handle ? DMTestEnv::PkType::CommonHandle : DMTestEnv::PkType::HiddenTiDBRowID); @@ -1237,41 +1103,37 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols, RowKeyRanges{range.range}); // Filtered by read_range - - Int64 num_rows_read = 0; - stream->readPrefix(); Int64 expect_first_pk = int(std::floor(std::max(0, range.start) / span_per_part)) * span_per_part; Int64 expect_last_pk = std::min(num_rows_write, // int(std::ceil(std::min(num_rows_write, range.end) / span_per_part)) * span_per_part + (range.end % span_per_part ? span_per_part : 0)); - Int64 cur_pk = expect_first_pk; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(DMTestEnv::pk_name)); - auto col = in.getByName(DMTestEnv::pk_name); - auto & c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - DMTestEnv::verifyClusteredIndexValue((*c)[i].get(), cur_pk++, rowkey_column_size); - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, expect_last_pk - expect_first_pk) // - << "range: " << range.range.toDebugString() // - << ", first: " << expect_first_pk << ", last: " << expect_last_pk; + // mock common handle + auto common_handle_coldata = [this, expect_first_pk, expect_last_pk]() { + std::vector int_coldata = createNumbers(expect_first_pk, expect_last_pk); + Strings res; + std::transform(int_coldata.begin(), int_coldata.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), expect_last_pk - expect_first_pk); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })) + << fmt::format("range: {}, first: {}, last: {}", range.range.toDebugString(), expect_first_pk, expect_last_pk); } } CATCH INSTANTIATE_TEST_CASE_P(DTFileMode, // - DMFile_Clustered_Index_Test, + DMFileClusteredIndexTest, testing::Values(DMFile::Mode::FOLDER, DMFile::Mode::SINGLE_FILE), paramToString); /// DDL test cases -class DMFile_DDL_Test : public DMFile_Test +class DMFileDDLTest : public DMFileTest { public: /// Write some data into DMFile. @@ -1291,20 +1153,18 @@ class DMFile_DDL_Test : public DMFile_Test { // Prepare write Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); - - auto col = i8_col.type->createColumn(); - for (size_t i = 0; i < num_rows_write; i++) + if (!i8_is_nullable) { - Field field; // Null by default - if (!i8_is_nullable || (i8_is_nullable && i < num_rows_write / 2)) - field = toField(Int64(i) * (-1 * (i % 2))); - col->insert(field); + auto i8_coldata = createSignedNumbers(0, num_rows_write); + block.insert(createColumn(i8_coldata, i8_col.name, i8_col.id)); + } + else + { + auto c = getExpectedI8Column(num_rows_write); + c.name = i8_col.name; + c.column_id = i8_col.id; + block.insert(c); } - block.insert(ColumnWithTypeAndName{ - std::move(col), - i8_col.type, - i8_col.name, - i8_col.id}); block.insert(DB::tests::createColumn( std::vector(num_rows_write, 0.125), @@ -1320,9 +1180,21 @@ class DMFile_DDL_Test : public DMFile_Test return {num_rows_write, *cols_before_ddl}; } } + + static ColumnWithTypeAndName getExpectedI8Column(size_t num_rows_write) + { + auto i8_coldata = createSignedNumbers(0, num_rows_write); + std::vector nullmap(num_rows_write, 0); + for (size_t i = 0; i < num_rows_write / 2; ++i) + { + i8_coldata[i] = 0; + nullmap[i] = 1; + } + return createNullableColumn(i8_coldata, nullmap); + } }; -TEST_P(DMFile_DDL_Test, AddColumn) +TEST_P(DMFileDDLTest, AddColumn) try { // Prepare some data before ddl @@ -1345,71 +1217,22 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols_after_ddl, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 row_number = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has("i8")); - ASSERT_TRUE(in.has("f64")); - ASSERT_TRUE(in.has(new_s_col.name)); - ASSERT_TRUE(in.has(new_i_col_with_default.name)); - { - auto col = in.getByName(new_s_col.name); - EXPECT_EQ(col.column_id, new_s_col.id); - EXPECT_TRUE(col.type->equals(*new_s_col.type)); - auto c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - Field value = (*c)[i]; - ASSERT_EQ(value.getType(), Field::Types::String); - // Empty default value - ASSERT_EQ(value, new_s_col.type->getDefault()); - } - } - { - auto col = in.getByName(new_i_col_with_default.name); - EXPECT_EQ(col.column_id, new_i_col_with_default.id); - EXPECT_TRUE(col.type->equals(*new_i_col_with_default.type)); - auto c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - ASSERT_EQ(c->getInt(i), 5); // Should fill with default value - } - } - { - // Check old columns before ddl - auto col = in.getByName("i8"); - EXPECT_EQ(col.column_id, 2L); - EXPECT_TRUE(col.type->equals(*typeFromString("Int8"))); - auto c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - EXPECT_EQ(c->getInt(i), Int64(row_number * (-1 * (row_number % 2)))); - row_number++; - } - } - { - auto col = in.getByName("f64"); - EXPECT_EQ(col.column_id, 3L); - EXPECT_TRUE(col.type->equals(*typeFromString("Float64"))); - auto c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - Field value = (*c)[i]; - EXPECT_FLOAT_EQ(value.get(), 0.125); - } - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({"i8", "f64", new_s_col.name, new_i_col_with_default.name}), + createColumns({ + // old cols + createColumn(createSignedNumbers(0, num_rows_write)), + createColumn(std::vector(num_rows_write, 0.125)), + // new cols + createColumn(Strings(num_rows_write, "")), // filled with empty + createColumn(std::vector(num_rows_write, 5)), // filled with default value + })); } } CATCH -TEST_P(DMFile_DDL_Test, UpcastColumnType) +TEST_P(DMFileDDLTest, UpcastColumnType) try { // Prepare some data before ddl @@ -1431,56 +1254,26 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols_after_ddl, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 row_number = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(new_col.name)); - ASSERT_TRUE(!in.has("i8")); - ASSERT_TRUE(in.has("f64")); - { - auto col = in.getByName(new_col.name); - EXPECT_EQ(col.column_id, new_col.id); - EXPECT_TRUE(col.type->equals(*new_col.type)); - auto c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - auto value = c->getInt(Int64(i)); - ASSERT_EQ(value, (Int64)(row_number * (-1 * (row_number % 2)))); - row_number++; - } - } - { - // Check old columns before ddl - auto col = in.getByName("f64"); - EXPECT_EQ(col.column_id, 3L); - EXPECT_TRUE(col.type->equals(*typeFromString("Float64"))); - auto c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - Field value = (*c)[i]; - EXPECT_DOUBLE_EQ(value.get(), 0.125); - } - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({new_col.name, "f64"}), + createColumns({ + createColumn(createSignedNumbers(0, num_rows_write)), + // old cols + createColumn(std::vector(num_rows_write, 0.125)), + })); } } CATCH -TEST_P(DMFile_DDL_Test, NotNullToNull) +TEST_P(DMFileDDLTest, NotNullToNull) try { // Prepare some data before ddl const auto [num_rows_write, cols_before_ddl] = prepareSomeDataToDMFile(); // Mock that we achange a column type from int8 -> Nullable(int32), and its name to "i8_new" after ddl - auto cols_after_ddl = std::make_shared(); - *cols_after_ddl = cols_before_ddl; + auto cols_after_ddl = std::make_shared(cols_before_ddl); const ColumnDefine old_col = cols_before_ddl[3]; ASSERT_TRUE(old_col.type->equals(*typeFromString("Int8"))); ColumnDefine new_col = old_col; @@ -1494,48 +1287,18 @@ try auto stream = builder .setColumnCache(column_cache_) .build(dm_file, *cols_after_ddl, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 row_number = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(new_col.name)); - ASSERT_TRUE(!in.has("i8")); - ASSERT_TRUE(in.has("f64")); - { - auto col = in.getByName(new_col.name); - EXPECT_EQ(col.column_id, new_col.id); - EXPECT_TRUE(col.type->equals(*new_col.type)); - auto c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - auto value = (*c)[i]; - ASSERT_FALSE(value.isNull()); - ASSERT_EQ(value, (Int64)(row_number * (-1 * (row_number % 2)))); - row_number++; - } - } - { - auto col = in.getByName("f64"); - EXPECT_EQ(col.column_id, 3L); - EXPECT_TRUE(col.type->equals(*typeFromString("Float64"))); - auto c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - Field value = (*c)[i]; - EXPECT_DOUBLE_EQ(value.get(), 0.125); - } - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({new_col.name, "f64"}), + createColumns({ + createNullableColumn(createSignedNumbers(0, num_rows_write), /*null_map=*/std::vector(num_rows_write, 0)), + createColumn(std::vector(num_rows_write, 0.125)), + })); } } CATCH -TEST_P(DMFile_DDL_Test, NullToNotNull) +TEST_P(DMFileDDLTest, NullToNotNull) try { // Prepare some data before ddl @@ -1558,57 +1321,23 @@ try .setColumnCache(column_cache_) .build(dm_file, *cols_after_ddl, RowKeyRanges{RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - stream->readPrefix(); - Int64 row_number = 0; - while (Block in = stream->read()) - { - ASSERT_TRUE(in.has(new_col.name)); - ASSERT_TRUE(!in.has("i8")); - ASSERT_TRUE(in.has("f64")); - { - auto col = in.getByName(new_col.name); - EXPECT_EQ(col.column_id, new_col.id); - EXPECT_TRUE(col.type->equals(*new_col.type)); - auto c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - auto value = (*c)[i]; - if (i < num_rows_write / 2) - { - ASSERT_FALSE(value.isNull()) << " at row: " << i; - ASSERT_EQ(value, (Int64)(row_number * (-1 * (row_number % 2)))) << " at row: " << i; - } - else - { - ASSERT_FALSE(value.isNull()) << " at row: " << i; - ASSERT_EQ(value, (Int64)0) << " at row: " << i; - } - row_number++; - } - } - { - // Check old columns before ddl - auto col = in.getByName("f64"); - EXPECT_EQ(col.column_id, 3L); - EXPECT_TRUE(col.type->equals(*typeFromString("Float64"))); - auto c = col.column; - for (size_t i = 0; i < c->size(); i++) - { - Field value = (*c)[i]; - EXPECT_DOUBLE_EQ(value.get(), 0.125); - } - } - num_rows_read += in.rows(); - } - stream->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + auto i32_coldata = createSignedNumbers(0, num_rows_write); + for (size_t i = 0; i < num_rows_write / 2; ++i) + i32_coldata[i] = 0; + ASSERT_INPUTSTREAM_COLS_UR( + stream, + Strings({DMTestEnv::pk_name, new_col.name, "f64"}), + createColumns({ + createColumn(createNumbers(0, num_rows_write)), + createColumn(i32_coldata), + createColumn(std::vector(num_rows_write, 0.125)), + })); } } CATCH INSTANTIATE_TEST_CASE_P(DTFileMode, // - DMFile_DDL_Test, + DMFileDDLTest, testing::Values(DMFileMode::SingleFile, DMFileMode::DirectoryLegacy, DMFileMode::DirectoryChecksum), paramToString); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp index f066237d8e2..345ecff9286 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_minmax_index.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -125,9 +126,7 @@ bool checkMatch( const ColumnDefine & col_to_read = check_pk ? getExtraHandleColumnDefine(is_common_handle) : cd; auto streams = store->read(context, context.getSettingsRef(), {col_to_read}, {all_range}, 1, std::numeric_limits::max(), filter, name, false); - streams[0]->readPrefix(); - auto rows = streams[0]->read().rows(); - streams[0]->readSuffix(); + auto rows = getInputStreamNRows(streams[0]); store->drop(); return rows != 0; diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index deec5646d33..f27abf157dc 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -139,14 +140,7 @@ try { // read written data (only in delta) auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } { @@ -157,14 +151,7 @@ try { // read written data (only in stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } } @@ -180,14 +167,7 @@ try { // read written data (both in delta and stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write + num_rows_write_2); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write + num_rows_write_2); } { @@ -198,14 +178,7 @@ try { // read written data (only in stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write + num_rows_write_2); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write + num_rows_write_2); } } } @@ -235,15 +208,8 @@ try { auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); // only write two visible pks - ASSERT_EQ(num_rows_read, 2); + ASSERT_INPUTSTREAM_NROWS(in, 2); } } CATCH @@ -278,14 +244,7 @@ try { // read written data (only in delta) auto in = segment->getInputStream(dmContext(), *tableColumns(), read_ranges); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, expect_read_rows); + ASSERT_INPUTSTREAM_NROWS(in, expect_read_rows); } { @@ -298,14 +257,7 @@ try { // read written data (only in stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), read_ranges); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, expect_read_rows); + ASSERT_INPUTSTREAM_NROWS(in, expect_read_rows); } } @@ -323,14 +275,7 @@ try { // read written data (both in delta and stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), read_ranges); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, expect_read_rows_2); + ASSERT_INPUTSTREAM_NROWS(in, expect_read_rows_2); } { @@ -342,14 +287,7 @@ try { // read written data (only in stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), read_ranges); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, expect_read_rows_2); + ASSERT_INPUTSTREAM_NROWS(in, expect_read_rows_2); } } } @@ -372,18 +310,6 @@ try segment->write(dmContext(), block); }; - auto check_rows = [&](size_t expected_rows) { - auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, expected_rows); - }; - { // check segment segment->check(dmContext(), "test"); @@ -391,12 +317,16 @@ try // Thread A write_rows(100); - check_rows(100); + ASSERT_INPUTSTREAM_NROWS( + segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}), + 100); auto snap = segment->createSnapshot(dmContext(), false, CurrentMetrics::DT_SnapshotOfRead); // Thread B write_rows(100); - check_rows(200); + ASSERT_INPUTSTREAM_NROWS( + segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}), + 200); // Thread A { @@ -408,14 +338,7 @@ try {}, std::numeric_limits::max(), DEFAULT_BLOCK_SIZE); - int num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, 100); + ASSERT_INPUTSTREAM_NROWS(in, 100); } } CATCH @@ -444,15 +367,7 @@ try auto get_rows = [&](const RowKeyRange & range) { auto in = segment->getInputStream(dmContext(), *tableColumns(), {range}); - in->readPrefix(); - size_t rows = 0; - while (Block block = in->read()) - { - rows += block.rows(); - } - in->readSuffix(); - - return rows; + return getInputStreamNRows(in); }; // First place the block packs, so that we can only place DeleteRange below. @@ -522,14 +437,7 @@ try { // read written data auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } { @@ -550,21 +458,10 @@ try { // read after delete range auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), 2UL); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(0), 0); - EXPECT_EQ(c->getInt(1), 99); - } - } - } - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn({0, 99})})); } // For the case that apply merge delta after delete range, we ensure that data on disk are compacted @@ -572,15 +469,8 @@ try { // read raw after delete range auto in = segment->getInputStreamRaw(dmContext(), *tableColumns()); - in->readPrefix(); - size_t num_rows = 0; - while (Block block = in->read()) - { - num_rows += block.rows(); - } - in->readSuffix(); // Only 2 rows are left on disk, others are compacted. - ASSERT_EQ(num_rows, 2UL); + ASSERT_INPUTSTREAM_NROWS(in, 2); } } CATCH @@ -600,14 +490,7 @@ try { // read written data auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } { @@ -636,21 +519,10 @@ try { // read after delete range auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), 2UL); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(0), 0); - EXPECT_EQ(c->getInt(1), 99); - } - } - } - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn({0, 99})})); } // For the case that apply merge delta after delete range, we ensure that data on disk are compacted @@ -658,15 +530,8 @@ try { // read raw after delete range auto in = segment->getInputStreamRaw(dmContext(), *tableColumns()); - in->readPrefix(); - size_t num_rows = 0; - while (Block block = in->read()) - { - num_rows += block.rows(); - } - in->readSuffix(); // Only 2 rows are left on disk, others are compacted. - ASSERT_EQ(num_rows, 2UL); + ASSERT_INPUTSTREAM_NROWS(in, 2); } } CATCH @@ -695,14 +560,7 @@ try { // read written data auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } { @@ -723,21 +581,10 @@ try { // read after delete range auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), 2UL); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(0), 0); - EXPECT_EQ(c->getInt(1), 99); - } - } - } - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn({0, 99})})); } // For the case that apply merge delta after delete range, we ensure that data on disk are compacted @@ -745,15 +592,8 @@ try { // read raw after delete range auto in = segment->getInputStreamRaw(dmContext(), *tableColumns()); - size_t num_rows = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows += block.rows(); - } - in->readSuffix(); // Only 2 rows are left on disk, others are compacted. - ASSERT_EQ(num_rows, 2UL); + ASSERT_INPUTSTREAM_NROWS(in, 2); } } CATCH @@ -795,23 +635,10 @@ try // Read after deletion // The deleted range has no overlap with current data, so there should be no change auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write); - for (auto & iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); i++) - { - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(i), i); - } - } - } - } - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(0, num_rows_write))})); } { @@ -828,22 +655,10 @@ try // Read after deletion // The deleted range has overlap range [63, 64) with current data, so the record with Handle 63 should be deleted auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write - 1); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(0), 0); - EXPECT_EQ(c->getInt(62), 62); - } - EXPECT_EQ(c->size(), 63UL); - } - } - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(createNumbers(0, 63))})); } { @@ -858,21 +673,14 @@ try { // Read after deletion auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write - 32); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(0), 0); - EXPECT_EQ(c->getInt(1), 32); - } - } - } - in->readSuffix(); + + std::vector pk_coldata{0}; + auto tmp = createNumbers(32, 63); + pk_coldata.insert(pk_coldata.end(), tmp.begin(), tmp.end()); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(pk_coldata)})); } { @@ -888,21 +696,13 @@ try { // Read after deletion auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write - 32); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(0), 0); - EXPECT_EQ(c->getInt(1), 32); - } - } - } - in->readSuffix(); + std::vector pk_coldata{0}; + auto tmp = createNumbers(32, 63); + pk_coldata.insert(pk_coldata.end(), tmp.begin(), tmp.end()); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(pk_coldata)})); } { @@ -918,20 +718,11 @@ try { // Read after deletion auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write - 33); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(0), 32); - } - } - } - in->readSuffix(); + std::vector pk_coldata = createNumbers(32, 63); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(pk_coldata)})); } { @@ -945,23 +736,14 @@ try { // Read after new write auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write - 33 + 7); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - EXPECT_EQ(c->getInt(0), 9); - EXPECT_EQ(c->getInt(6), 15); - EXPECT_EQ(c->getInt(7), 32); - EXPECT_EQ(c->getInt(block.rows() - 1), 62); - } - } - } - in->readSuffix(); + std::vector pk_coldata = createNumbers(9, 16); + auto tmp = createNumbers(32, 63); + pk_coldata.insert(pk_coldata.end(), tmp.begin(), tmp.end()); + + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(pk_coldata)})); } } CATCH @@ -985,15 +767,7 @@ try { // read written data auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } const auto old_range = segment->getRowKeyRange(); @@ -1011,29 +785,9 @@ try EXPECT_EQ(*s2_range.end.value, *old_range.end.value); // TODO check segment epoch is increase - size_t num_rows_seg1 = 0; - size_t num_rows_seg2 = 0; - { - { - auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_seg1 += block.rows(); - } - in->readSuffix(); - } - { - auto in = new_segment->getInputStream(dmContext(), *tableColumns(), {new_segment->getRowKeyRange()}); - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_seg2 += block.rows(); - } - in->readSuffix(); - } - ASSERT_EQ(num_rows_seg1 + num_rows_seg2, num_rows_write); - } + size_t num_rows_seg1 = getInputStreamNRows(segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()})); + size_t num_rows_seg2 = getInputStreamNRows(new_segment->getInputStream(dmContext(), *tableColumns(), {new_segment->getRowKeyRange()})); + ASSERT_EQ(num_rows_seg1 + num_rows_seg2, num_rows_write); // delete rows in the right segment { @@ -1052,15 +806,8 @@ try // TODO check segment epoch is increase } { - size_t num_rows_read = 0; auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, num_rows_seg1); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_seg1); } } } @@ -1192,7 +939,7 @@ try const time_t start_time = std::time(nullptr); - auto temp = std::vector(); + std::vector temp; for (;;) { { @@ -1227,26 +974,11 @@ try { // Read after writing auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - for (auto & iter : block) - { - auto c = iter.column; - for (size_t i = 0; i < c->size(); i++) - { - if (iter.name == DMTestEnv::pk_name) - { - auto expect = temp.at(i + num_rows_read); - EXPECT_EQ(c->getInt(Int64(i)), expect); - } - } - } - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_batches_written * (num_rows_per_write - 2), num_rows_read); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({createColumn(temp)})) + << fmt::format("num_batches_written={} num_rows_per_write={}", num_batches_written, num_rows_per_write); } { @@ -1375,15 +1107,8 @@ try }; auto read_rows = [&](const SegmentPtr & segment) { - size_t rows = 0; auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); - in->readPrefix(); - while (Block block = in->read()) - { - rows += block.rows(); - } - in->readSuffix(); - return rows; + return getInputStreamNRows(in); }; write_100_rows(segment); @@ -1526,27 +1251,13 @@ try { // read written data auto in = segment->getInputStream(dmContext(), *columns_to_read, {RowKeyRange::newAll(false, 1)}); - // check that we can read correct values - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(column_name_i8_to_i32)); - const ColumnWithTypeAndName & col = block.getByName(column_name_i8_to_i32); - ASSERT_DATATYPE_EQ(col.type, column_i32_after_ddl.type); - ASSERT_EQ(col.name, column_i32_after_ddl.name); - ASSERT_EQ(col.column_id, column_i32_after_ddl.id); - for (size_t i = 0; i < block.rows(); ++i) - { - auto value = col.column->getInt(i); - const auto expected = static_cast((i % 2 == 0 ? -1 : 1) * i); - ASSERT_EQ(value, expected) << "at row: " << i; - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, column_i32_after_ddl.name}), + createColumns({// + createColumn(createNumbers(0, num_rows_write)), + createColumn(createSignedNumbers(0, num_rows_write))})); } @@ -1574,33 +1285,18 @@ try auto in = segment->getInputStream(dmContext(), *columns_to_read, {RowKeyRange::newAll(false, 1)}); // check that we can read correct values - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(column_name_i8_to_i32)); - const ColumnWithTypeAndName & col = block.getByName(column_name_i8_to_i32); - ASSERT_DATATYPE_EQ(col.type, column_i32_after_ddl.type); - ASSERT_EQ(col.name, column_i32_after_ddl.name); - ASSERT_EQ(col.column_id, column_i32_after_ddl.id); - for (size_t i = 0; i < block.rows(); ++i) - { - auto value = col.column->getInt(i); - auto expected = 0; - if (i < num_rows_write / 2) - expected = static_cast((i % 2 == 0 ? -1 : 1) * i); - else - { - auto r = i - num_rows_write / 2; - expected = static_cast((r % 2 == 0 ? -1 : 1) * r); - } - // std::cerr << " row: " << i << " "<< value << std::endl; - ASSERT_EQ(value, expected) << "at row: " << i; - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, (size_t)(num_rows_write * 2)); + // [0, 50) is the old signed values, [50, 100) is replaced by newer written values + std::vector i32_columndata = createSignedNumbers(0, num_rows_write / 2); + auto tmp = createSignedNumbers(0, num_rows_write * 2 - num_rows_write / 2); + i32_columndata.insert(i32_columndata.end(), tmp.begin(), tmp.end()); + ASSERT_EQ(i32_columndata.size(), 2 * num_rows_write); + + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, column_i32_after_ddl.name}), + createColumns({// + createColumn(createNumbers(0, 2 * num_rows_write)), + createColumn(i32_columndata)})); } // Flush cache and apply delta-merge, then read again @@ -1615,33 +1311,17 @@ try auto in = segment->getInputStream(dmContext(), *columns_to_read, {RowKeyRange::newAll(false, 1)}); // check that we can read correct values - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(column_name_i8_to_i32)); - const ColumnWithTypeAndName & col = block.getByName(column_name_i8_to_i32); - ASSERT_DATATYPE_EQ(col.type, column_i32_after_ddl.type); - ASSERT_EQ(col.name, column_i32_after_ddl.name); - ASSERT_EQ(col.column_id, column_i32_after_ddl.id); - for (size_t i = 0; i < block.rows(); ++i) - { - auto value = col.column->getInt(i); - auto expected = 0; - if (i < num_rows_write / 2) - expected = static_cast((i % 2 == 0 ? -1 : 1) * i); - else - { - auto r = i - num_rows_write / 2; - expected = static_cast((r % 2 == 0 ? -1 : 1) * r); - } - // std::cerr << " row: " << i << " "<< value << std::endl; - ASSERT_EQ(value, expected) << "at row: " << i; - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, (size_t)(num_rows_write * 2)); + std::vector i32_columndata = createSignedNumbers(0, num_rows_write / 2); + auto tmp = createSignedNumbers(0, num_rows_write * 2 - num_rows_write / 2); + i32_columndata.insert(i32_columndata.end(), tmp.begin(), tmp.end()); + ASSERT_EQ(i32_columndata.size(), 2 * num_rows_write); + + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, column_i32_after_ddl.name}), + createColumns({// + createColumn(createNumbers(0, 2 * num_rows_write)), + createColumn(i32_columndata)})); } } CATCH @@ -1696,23 +1376,12 @@ try auto in = segment->getInputStream(dmContext(), *columns_after_ddl, {RowKeyRange::newAll(false, 1)}); // check that we can read correct values - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - const ColumnWithTypeAndName & col = block.getByName(new_column_define.name); - ASSERT_TRUE(col.type->equals(*new_column_define.type)); - ASSERT_EQ(col.name, new_column_define.name); - ASSERT_EQ(col.column_id, new_column_define.id); - for (size_t i = 0; i < block.rows(); ++i) - { - auto value = col.column->getInt(i); - ASSERT_EQ(value, new_column_default_value_int) << "at row:" << i; - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, new_column_define.name}), + createColumns({// + createColumn(createNumbers(0, num_rows_write)), + createColumn(std::vector(num_rows_write, new_column_default_value_int))})); } @@ -1742,33 +1411,19 @@ try auto in = segment->getInputStream(dmContext(), *columns_after_ddl, {RowKeyRange::newAll(false, 1)}); // check that we can read correct values - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(new_column_name)); - const ColumnWithTypeAndName & col = block.getByName(new_column_name); - ASSERT_DATATYPE_EQ(col.type, new_column_define.type); - ASSERT_EQ(col.name, new_column_define.name); - ASSERT_EQ(col.column_id, new_column_define.id); - for (size_t i = 0; i < block.rows(); ++i) - { - int8_t value = col.column->getInt(i); - int8_t expected = 0; - if (i < num_rows_write / 2) - expected = new_column_default_value_int; - else - { - auto r = i - num_rows_write / 2; - expected = static_cast((r % 2 == 0 ? -1 : 1) * r); - } - // std::cerr << " row: " << i << " "<< value << std::endl; - ASSERT_EQ(value, expected) << "at row: " << i; - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, (size_t)(num_rows_write * 2)); + // First 50 values are default value + std::vector i8_columndata(num_rows_write / 2, new_column_default_value_int); + // then fill with signed number sequence + auto tmp = createSignedNumbers(0, num_rows_write * 2 - num_rows_write / 2); + i8_columndata.insert(i8_columndata.end(), tmp.begin(), tmp.end()); + ASSERT_EQ(i8_columndata.size(), 2 * num_rows_write); + + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, new_column_define.name}), + createColumns({// + createColumn(createNumbers(0, 2 * num_rows_write)), + createColumn(i8_columndata)})); } // Flush cache and apply delta-merge, then read again @@ -1783,33 +1438,19 @@ try auto in = segment->getInputStream(dmContext(), *columns_after_ddl, {RowKeyRange::newAll(false, 1)}); // check that we can read correct values - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - ASSERT_TRUE(block.has(new_column_name)); - const ColumnWithTypeAndName & col = block.getByName(new_column_name); - ASSERT_DATATYPE_EQ(col.type, new_column_define.type); - ASSERT_EQ(col.name, new_column_define.name); - ASSERT_EQ(col.column_id, new_column_define.id); - for (size_t i = 0; i < block.rows(); ++i) - { - int8_t value = col.column->getInt(i); - int8_t expected = 0; - if (i < num_rows_write / 2) - expected = new_column_default_value_int; - else - { - auto r = i - num_rows_write / 2; - expected = static_cast((r % 2 == 0 ? -1 : 1) * r); - } - // std::cerr << " row: " << i << " "<< value << std::endl; - ASSERT_EQ(value, expected) << "at row: " << i; - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, (size_t)(num_rows_write * 2)); + // First 50 values are default value + std::vector i8_columndata(num_rows_write / 2, new_column_default_value_int); + // then fill with signed number sequence + auto tmp = createSignedNumbers(0, num_rows_write * 2 - num_rows_write / 2); + i8_columndata.insert(i8_columndata.end(), tmp.begin(), tmp.end()); + ASSERT_EQ(i8_columndata.size(), 2 * num_rows_write); + + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name, new_column_define.name}), + createColumns({// + createColumn(createNumbers(0, 2 * num_rows_write)), + createColumn(i8_columndata)})); } } CATCH diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment_common_handle.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment_common_handle.cpp index 6359a3db184..1944a1bf7e1 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment_common_handle.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment_common_handle.cpp @@ -17,7 +17,9 @@ #include #include #include +#include #include +#include #include #include @@ -28,12 +30,10 @@ namespace DM { namespace tests { -class Segment_Common_Handle_test : public DB::base::TiFlashStorageTestBasic +class SegmentCommonHandleTest : public DB::base::TiFlashStorageTestBasic { public: - Segment_Common_Handle_test() - : storage_pool() - {} + SegmentCommonHandleTest() {} public: static void SetUpTestCase() {} @@ -95,10 +95,10 @@ class Segment_Common_Handle_test : public DB::base::TiFlashStorageTestBasic // the segment we are going to test SegmentPtr segment; bool is_common_handle = true; - size_t rowkey_column_size = 2; + const size_t rowkey_column_size = 2; }; -TEST_F(Segment_Common_Handle_test, WriteRead) +TEST_F(SegmentCommonHandleTest, WriteRead) try { const size_t num_rows_write = 100; @@ -115,11 +115,11 @@ try // write to segment segment->write(dmContext(), block); // estimate segment - auto estimatedRows = segment->getEstimatedRows(); - ASSERT_EQ(estimatedRows, block.rows()); + auto estimated_rows = segment->getEstimatedRows(); + ASSERT_EQ(estimated_rows, block.rows()); - auto estimatedBytes = segment->getEstimatedBytes(); - ASSERT_EQ(estimatedBytes, block.bytes()); + auto estimated_bytes = segment->getEstimatedBytes(); + ASSERT_EQ(estimated_bytes, block.bytes()); } { @@ -131,14 +131,7 @@ try { // read written data (only in delta) auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } { @@ -149,14 +142,7 @@ try { // read written data (only in stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } } @@ -180,14 +166,7 @@ try { // read written data (both in delta and stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write + num_rows_write_2); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write + num_rows_write_2); } { @@ -198,20 +177,13 @@ try { // read written data (only in stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write + num_rows_write_2); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write + num_rows_write_2); } } } CATCH -TEST_F(Segment_Common_Handle_test, WriteReadMultiRange) +TEST_F(SegmentCommonHandleTest, WriteReadMultiRange) try { const size_t num_rows_write = 100; @@ -228,11 +200,11 @@ try // write to segment segment->write(dmContext(), block); // estimate segment - auto estimatedRows = segment->getEstimatedRows(); - ASSERT_EQ(estimatedRows, block.rows()); + auto estimated_rows = segment->getEstimatedRows(); + ASSERT_EQ(estimated_rows, block.rows()); - auto estimatedBytes = segment->getEstimatedBytes(); - ASSERT_EQ(estimatedBytes, block.bytes()); + auto estimated_bytes = segment->getEstimatedBytes(); + ASSERT_EQ(estimated_bytes, block.bytes()); } { @@ -240,23 +212,17 @@ try segment->check(dmContext(), "test"); } - RowKeyRanges read_ranges; - read_ranges.emplace_back(RowKeyRange::fromHandleRange(HandleRange(0, 10), true)); - read_ranges.emplace_back(RowKeyRange::fromHandleRange(HandleRange(20, 30), true)); - read_ranges.emplace_back(RowKeyRange::fromHandleRange(HandleRange(110, 130), true)); + RowKeyRanges read_ranges{ + (RowKeyRange::fromHandleRange(HandleRange(0, 10), true)), + (RowKeyRange::fromHandleRange(HandleRange(20, 30), true)), + (RowKeyRange::fromHandleRange(HandleRange(110, 130), true)), + }; const size_t expect_read_rows = 20; { // Round 1 { // read written data (only in delta) auto in = segment->getInputStream(dmContext(), *tableColumns(), read_ranges); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, expect_read_rows); + ASSERT_INPUTSTREAM_NROWS(in, expect_read_rows); } { @@ -269,14 +235,7 @@ try { // read written data (only in stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), read_ranges); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, expect_read_rows); + ASSERT_INPUTSTREAM_NROWS(in, expect_read_rows); } } @@ -302,14 +261,7 @@ try { // read written data (both in delta and stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), read_ranges); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, expect_read_rows_2); + ASSERT_INPUTSTREAM_NROWS(in, expect_read_rows_2); } { @@ -321,20 +273,13 @@ try { // read written data (only in stable) auto in = segment->getInputStream(dmContext(), *tableColumns(), read_ranges); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, expect_read_rows_2); + ASSERT_INPUTSTREAM_NROWS(in, expect_read_rows_2); } } } CATCH -class SegmentDeletion_Common_Handle_test : public Segment_Common_Handle_test +class SegmentDeletion_Common_Handle_test : public SegmentCommonHandleTest , // public testing::WithParamInterface> { @@ -363,14 +308,7 @@ try { // read written data auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } { @@ -389,21 +327,21 @@ try { // read after delete range auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), 2UL); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](0).get(), 0, rowkey_column_size); - DMTestEnv::verifyClusteredIndexValue(c->operator[](1).get(), 99, rowkey_column_size); - } - } - } - in->readSuffix(); + const size_t nrows_after_delete = 2; + // mock common handle + auto common_handle_coldata = [this]() { + auto tmp = std::vector{0, 99}; + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), nrows_after_delete); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } } CATCH @@ -431,14 +369,7 @@ try { // read written data auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } { @@ -465,21 +396,21 @@ try { // read after delete range auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), 2UL); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](0).get(), 0, rowkey_column_size); - DMTestEnv::verifyClusteredIndexValue(c->operator[](1).get(), 99, rowkey_column_size); - } - } - } - in->readSuffix(); + const size_t nrows_after_delete = 2; + // mock common handle + auto common_handle_coldata = [this]() { + auto tmp = std::vector{0, 99}; + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), nrows_after_delete); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } } CATCH @@ -524,14 +455,7 @@ try { // read written data auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } { @@ -550,21 +474,21 @@ try { // read after delete range auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), 2UL); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](0).get(), 0, rowkey_column_size); - DMTestEnv::verifyClusteredIndexValue(c->operator[](1).get(), 99, rowkey_column_size); - } - } - } - in->readSuffix(); + const size_t nrows_after_delete = 2; + // mock common handle + auto common_handle_coldata = [this]() { + auto tmp = std::vector{0, 99}; + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), nrows_after_delete); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } } CATCH @@ -573,7 +497,7 @@ INSTANTIATE_TEST_CASE_P(WhetherReadOrMergeDeltaBeforeDeleteRange, SegmentDeletion_Common_Handle_test, testing::Combine(testing::Bool(), testing::Bool())); -TEST_F(Segment_Common_Handle_test, DeleteRead) +TEST_F(SegmentCommonHandleTest, DeleteRead) try { const size_t num_rows_write = 64; @@ -599,6 +523,7 @@ try // Test delete range [70, 100) segment->write(dmContext(), {DMTestEnv::getRowKeyRangeForClusteredIndex(70, 100, rowkey_column_size)}); // flush segment + segment->flushCache(dmContext()); segment = segment->mergeDelta(dmContext(), tableColumns()); } @@ -606,29 +531,27 @@ try // Read after deletion // The deleted range has no overlap with current data, so there should be no change auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write); - for (auto & iter : block) - { - auto c = iter.column; - for (Int64 i = 0; i < Int64(c->size()); i++) - { - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](i).get(), i, rowkey_column_size); - } - } - } - } - in->readSuffix(); + // mock common handle + auto common_handle_coldata = [this]() { + auto tmp = createNumbers(0, num_rows_write); + Strings res; + std::transform(tmp.begin(), tmp.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), num_rows_write); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } { // Test delete range [63, 70) segment->write(dmContext(), {DMTestEnv::getRowKeyRangeForClusteredIndex(63, 70, rowkey_column_size)}); // flush segment + segment->flushCache(dmContext()); segment = segment->mergeDelta(dmContext(), tableColumns()); } @@ -636,49 +559,49 @@ try // Read after deletion // The deleted range has overlap range [63, 64) with current data, so the record with Handle 63 should be deleted auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write - 1); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](0).get(), 0, rowkey_column_size); - DMTestEnv::verifyClusteredIndexValue(c->operator[](62).get(), 62, rowkey_column_size); - } - EXPECT_EQ(c->size(), 63UL); - } - } - in->readSuffix(); + // mock common handle + auto common_handle_coldata = [this]() { + std::vector int_coldata = createNumbers(0, 63); + Strings res; + std::transform(int_coldata.begin(), int_coldata.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_EQ(common_handle_coldata.size(), num_rows_write - 1); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } { // Test delete range [1, 32) segment->write(dmContext(), {DMTestEnv::getRowKeyRangeForClusteredIndex(1, 32, rowkey_column_size)}); // flush segment + segment->flushCache(dmContext()); segment = segment->mergeDelta(dmContext(), tableColumns()); } { // Read after deletion auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write - 32); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](0).get(), 0, rowkey_column_size); - DMTestEnv::verifyClusteredIndexValue(c->operator[](1).get(), 32, rowkey_column_size); - } - } - } - in->readSuffix(); + // mock common handle + auto common_handle_coldata = [this]() { + // the result should be [0, 32,33,34,...62] + std::vector int_coldata{0}; + auto tmp = createNumbers(32, 63); + int_coldata.insert(int_coldata.end(), tmp.begin(), tmp.end()); + Strings res; + std::transform(int_coldata.begin(), int_coldata.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } { @@ -686,27 +609,28 @@ try // delete should be idempotent segment->write(dmContext(), {DMTestEnv::getRowKeyRangeForClusteredIndex(1, 32, rowkey_column_size)}); // flush segment + segment->flushCache(dmContext()); segment = segment->mergeDelta(dmContext(), tableColumns()); } { // Read after deletion auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write - 32); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](0).get(), 0, rowkey_column_size); - DMTestEnv::verifyClusteredIndexValue(c->operator[](1).get(), 32, rowkey_column_size); - } - } - } - in->readSuffix(); + // mock common handle + auto common_handle_coldata = [this]() { + std::vector int_coldata{0}; + auto tmp = createNumbers(32, 63); + int_coldata.insert(int_coldata.end(), tmp.begin(), tmp.end()); + Strings res; + std::transform(int_coldata.begin(), int_coldata.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } { @@ -714,31 +638,31 @@ try // There is an overlap range [0, 1) segment->write(dmContext(), {DMTestEnv::getRowKeyRangeForClusteredIndex(0, 2, rowkey_column_size)}); // flush segment + segment->flushCache(dmContext()); segment = segment->mergeDelta(dmContext(), tableColumns()); } { // Read after deletion auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - ASSERT_EQ(block.rows(), num_rows_write - 33); - for (auto & iter : block) - { - auto c = iter.column; - if (iter.name == DMTestEnv::pk_name) - { - DMTestEnv::verifyClusteredIndexValue(c->operator[](0).get(), 32, rowkey_column_size); - } - } - } - in->readSuffix(); + // mock common handle + auto common_handle_coldata = [this]() { + std::vector int_coldata = createNumbers(32, 63); + Strings res; + std::transform(int_coldata.begin(), int_coldata.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } } CATCH -TEST_F(Segment_Common_Handle_test, Split) +TEST_F(SegmentCommonHandleTest, Split) try { const size_t num_rows_write = 100; @@ -759,15 +683,7 @@ try { // read written data auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } const auto old_range = segment->getRowKeyRange(); @@ -785,29 +701,9 @@ try EXPECT_EQ(*s2_range.end.value, *old_range.end.value); // TODO check segment epoch is increase - size_t num_rows_seg1 = 0; - size_t num_rows_seg2 = 0; - { - { - auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_seg1 += block.rows(); - } - in->readSuffix(); - } - { - auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_seg2 += block.rows(); - } - in->readSuffix(); - } - ASSERT_EQ(num_rows_seg1 + num_rows_seg2, num_rows_write); - } + size_t num_rows_seg1 = getInputStreamNRows(segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)})); + size_t num_rows_seg2 = getInputStreamNRows(segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)})); + ASSERT_EQ(num_rows_seg1 + num_rows_seg2, num_rows_write); // merge segments // TODO: enable merge test! @@ -822,21 +718,14 @@ try // TODO check segment epoch is increase } { - size_t num_rows_read = 0; auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - EXPECT_EQ(num_rows_read, num_rows_write); + ASSERT_INPUTSTREAM_NROWS(in, num_rows_write); } } } CATCH -TEST_F(Segment_Common_Handle_test, Restore) +TEST_F(SegmentCommonHandleTest, Restore) try { // compare will compares the given segments. @@ -935,7 +824,7 @@ try } CATCH -TEST_F(Segment_Common_Handle_test, MassiveSplit) +TEST_F(SegmentCommonHandleTest, MassiveSplit) try { Settings settings = dmContext().db_context.getSettings(); @@ -991,26 +880,19 @@ try { // Read after writing auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(is_common_handle, rowkey_column_size)}); - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - for (auto & iter : block) - { - auto c = iter.column; - for (size_t i = 0; i < c->size(); i++) - { - if (iter.name == DMTestEnv::pk_name) - { - auto expect = temp.at(i + num_rows_read); - DMTestEnv::verifyClusteredIndexValue(c->operator[](Int64(i)).get(), expect, rowkey_column_size); - } - } - } - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_batches_written * (num_rows_per_write - 2), num_rows_read); + ASSERT_EQ(temp.size(), num_batches_written * (num_rows_per_write - 2)); + // mock common handle + auto common_handle_coldata = [this, &temp]() { + Strings res; + std::transform(temp.begin(), temp.end(), std::back_inserter(res), [this](Int64 v) { return genMockCommonHandle(v, rowkey_column_size); }); + return res; + }(); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({DMTestEnv::pk_name}), + createColumns({ + createColumn(common_handle_coldata), + })); } { diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp index f929e153847..ac03b509f18 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -57,13 +58,14 @@ namespace tests TEST(StorageDeltaMergeTest, ReadWriteCase1) try { + size_t num_rows_write = 100; // prepare block data Block sample; sample.insert(DB::tests::createColumn( - createNumbers(0, 100, /*reversed*/ true), + createNumbers(0, num_rows_write, /*reversed*/ true), "col1")); sample.insert(DB::tests::createColumn( - Strings(100, "a"), + Strings(num_rows_write, "a"), "col2")); Context ctx = DMTestEnv::getContext(); @@ -120,30 +122,10 @@ try BlockInputStreams ins = storage->read(column_names, query_info, ctx, stage2, 8192, 1); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; - in->readPrefix(); - - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - for (auto & iter : block) - { - auto c = iter.column; - for (unsigned int i = 0; i < c->size(); i++) - { - if (iter.name == "col1") - { - ASSERT_EQ(c->getInt(i), i); - } - else if (iter.name == "col2") - { - ASSERT_EQ(c->getDataAt(i), "a"); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, sample.rows()); + ASSERT_INPUTSTREAM_BLOCK_UR( + in, + Block({createColumn(createNumbers(0, num_rows_write), "col1"), + createColumn(Strings(num_rows_write, "a"), "col2")})); auto store_status = storage->status(); Block status = store_status->read(); @@ -157,7 +139,7 @@ try } else if (col_name->getDataAt(i) == String("total_rows")) { - EXPECT_EQ(col_value->getDataAt(i), String(DB::toString(num_rows_read))); + EXPECT_EQ(col_value->getDataAt(i), String(DB::toString(num_rows_write))); } } auto delta_store = storage->getStore(); @@ -167,7 +149,7 @@ try { total_segment_rows += stat.rows; } - EXPECT_EQ(total_segment_rows, num_rows_read); + EXPECT_EQ(total_segment_rows, num_rows_write); storage->drop(); // remove the storage from TiFlash context manually storage->removeFromTMTContext(); @@ -609,12 +591,13 @@ TEST(StorageDeltaMergeTest, ReadExtraPhysicalTableID) try { // prepare block data + size_t num_rows_write = 100; Block sample; sample.insert(DB::tests::createColumn( - createNumbers(0, 100, /*reversed*/ true), + createNumbers(0, num_rows_write, /*reversed*/ true), "col1")); sample.insert(DB::tests::createColumn( - Strings(100, "a"), + Strings(num_rows_write, "a"), "col2")); constexpr TiDB::TableID table_id = 1; const String table_name = fmt::format("t_{}", table_id); @@ -676,38 +659,14 @@ try BlockInputStreams ins = storage->read(read_columns, query_info, ctx, stage2, 8192, 1); ASSERT_EQ(ins.size(), 1); BlockInputStreamPtr in = ins[0]; - in->readPrefix(); + ASSERT_INPUTSTREAM_BLOCK_UR( + in, + Block({ + createColumn(createNumbers(0, num_rows_write), "col1"), + createConstColumn>(num_rows_write, table_id, EXTRA_TABLE_ID_COLUMN_NAME), + createColumn(Strings(num_rows_write, "a"), "col2"), + })); - size_t num_rows_read = 0; - while (Block block = in->read()) - { - ASSERT_EQ(block.getByPosition(1).name, EXTRA_TABLE_ID_COLUMN_NAME); - num_rows_read += block.rows(); - for (auto & iter : block) - { - auto c = iter.column; - for (unsigned int i = 0; i < c->size(); i++) - { - if (iter.name == "col1") - { - ASSERT_EQ(c->getInt(i), i); - } - else if (iter.name == "col2") - { - ASSERT_EQ(c->getDataAt(i), "a"); - } - else if (iter.name == EXTRA_TABLE_ID_COLUMN_NAME) - { - Field res; - c->get(i, res); - ASSERT_TRUE(!res.isNull()); - ASSERT_EQ(res.get(), table_id); - } - } - } - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, sample.rows()); storage->drop(); // remove the storage from TiFlash context manually storage->removeFromTMTContext(); @@ -801,15 +760,7 @@ try query_info.mvcc_query_info = std::make_unique(ctx.getSettingsRef().resolve_locks, std::numeric_limits::max()); Names read_columns = {"col1", EXTRA_TABLE_ID_COLUMN_NAME, "col2"}; BlockInputStreams ins = storage->read(read_columns, query_info, ctx, stage2, 8192, 1); - BlockInputStreamPtr in = ins[0]; - in->readPrefix(); - size_t num_rows_read = 0; - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - return num_rows_read; + return getInputStreamNRows(ins[0]); }; // create table diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp index c676f2e08d5..a46df50b9a4 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_segment_test_basic.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include namespace DB @@ -63,30 +64,14 @@ size_t SegmentTestBasic::getSegmentRowNumWithoutMVCC(PageId segment_id) { auto segment = segments[segment_id]; auto in = segment->getInputStreamRaw(dmContext(), *tableColumns()); - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - return num_rows_read; + return getInputStreamNRows(in); } size_t SegmentTestBasic::getSegmentRowNum(PageId segment_id) { auto segment = segments[segment_id]; auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - return num_rows_read; + return getInputStreamNRows(in); } void SegmentTestBasic::checkSegmentRow(PageId segment_id, size_t expected_row_num) @@ -94,15 +79,7 @@ void SegmentTestBasic::checkSegmentRow(PageId segment_id, size_t expected_row_nu auto segment = segments[segment_id]; // read written data auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); - - size_t num_rows_read = 0; - in->readPrefix(); - while (Block block = in->read()) - { - num_rows_read += block.rows(); - } - in->readSuffix(); - ASSERT_EQ(num_rows_read, expected_row_num); + ASSERT_INPUTSTREAM_NROWS(in, expected_row_num); } std::optional SegmentTestBasic::splitSegment(PageId segment_id) diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_version_filter.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_version_filter.cpp index 16b1729bea1..d96085942eb 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_version_filter.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_version_filter.cpp @@ -16,6 +16,8 @@ #include #include #include +#include +#include namespace DB { @@ -77,7 +79,7 @@ BlockInputStreamPtr genInputStream(const BlocksList & blocks, const ColumnDefine } // namespace -TEST(VersionFilter_test, MVCC) +TEST(VersionFilterTest, MVCC) { BlocksList blocks; @@ -93,48 +95,27 @@ TEST(VersionFilter_test, MVCC) { auto in = genInputStream(blocks, columns, 40, false); - in->readPrefix(); - Block block = in->read(); - auto col = block.getByName(str_col_name); - auto val = col.column->getDataAt(0); - ASSERT_EQ(val, "Flash"); - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({str_col_name}), createColumns({createColumn({"Flash"})})); } { auto in = genInputStream(blocks, columns, 30, false); - in->readPrefix(); - Block block = in->read(); - ASSERT_EQ(block.rows(), 0UL); - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({str_col_name}), createColumns({createColumn({})})); } { auto in = genInputStream(blocks, columns, 20, false); - in->readPrefix(); - Block block = in->read(); - auto col = block.getByName(str_col_name); - auto val = col.column->getDataAt(0); - ASSERT_EQ(val, "world"); - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({str_col_name}), createColumns({createColumn({"world"})})); } { auto in = genInputStream(blocks, columns, 10, false); - in->readPrefix(); - Block block = in->read(); - auto col = block.getByName(str_col_name); - auto val = col.column->getDataAt(0); - ASSERT_EQ(val, "hello"); - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({str_col_name}), createColumns({createColumn({"hello"})})); } { auto in = genInputStream(blocks, columns, 9, false); - in->readPrefix(); - Block block = in->read(); - ASSERT_EQ(block.rows(), 0UL); - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({str_col_name}), createColumns({createColumn({})})); } } -TEST(VersionFilter_test, MVCCCommonHandle) +TEST(VersionFilterTest, MVCCCommonHandle) { BlocksList blocks; @@ -150,48 +131,27 @@ TEST(VersionFilter_test, MVCCCommonHandle) { auto in = genInputStream(blocks, columns, 40, true); - in->readPrefix(); - Block block = in->read(); - auto col = block.getByName(str_col_name); - auto val = col.column->getDataAt(0); - ASSERT_EQ(val, "Flash"); - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({str_col_name}), createColumns({createColumn({"Flash"})})); } { auto in = genInputStream(blocks, columns, 30, true); - in->readPrefix(); - Block block = in->read(); - ASSERT_EQ(block.rows(), 0UL); - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({str_col_name}), createColumns({createColumn({})})); } { auto in = genInputStream(blocks, columns, 20, true); - in->readPrefix(); - Block block = in->read(); - auto col = block.getByName(str_col_name); - auto val = col.column->getDataAt(0); - ASSERT_EQ(val, "world"); - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({str_col_name}), createColumns({createColumn({"world"})})); } { auto in = genInputStream(blocks, columns, 10, true); - in->readPrefix(); - Block block = in->read(); - auto col = block.getByName(str_col_name); - auto val = col.column->getDataAt(0); - ASSERT_EQ(val, "hello"); - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({str_col_name}), createColumns({createColumn({"hello"})})); } { auto in = genInputStream(blocks, columns, 9, true); - in->readPrefix(); - Block block = in->read(); - ASSERT_EQ(block.rows(), 0UL); - in->readSuffix(); + ASSERT_INPUTSTREAM_COLS_UR(in, Strings({str_col_name}), createColumns({createColumn({})})); } } -TEST(VersionFilter_test, Compact) +TEST(VersionFilterTest, Compact) { // TODO: currently it just test data statistics, add test for data correctness BlocksList blocks; @@ -294,7 +254,7 @@ TEST(VersionFilter_test, Compact) } } -TEST(VersionFilter_test, CompactCommonHandle) +TEST(VersionFilterTest, CompactCommonHandle) { // TODO: currently it just test data statistics, add test for data correctness BlocksList blocks; diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp index 1c8b0242bfa..a7bb3d15f29 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.cpp +++ b/dbms/src/TestUtils/FunctionTestUtils.cpp @@ -46,30 +46,42 @@ ::testing::AssertionResult assertEqual( { if (expected_v != actual_v) { - auto expected_str = fmt::format("\n{}: {}", expected_expr, expected_display); - auto actual_str = fmt::format("\n{}: {}", actual_expr, actual_display); + auto expected_str = fmt::format("\n {}:\n {}", expected_expr, expected_display); + auto actual_str = fmt::format("\n {}:\n {}", actual_expr, actual_display); return ::testing::AssertionFailure() << title << expected_str << actual_str; } return ::testing::AssertionSuccess(); } -#define ASSERT_EQUAL_WITH_TEXT(expected_value, actual_value, title, expected_display, actual_display) \ - do \ - { \ - auto result = assertEqual(#expected_value, #actual_value, (expected_value), (actual_value), (expected_display), (actual_display), title); \ - if (!result) \ - return result; \ +#define ASSERT_EQUAL_WITH_TEXT(expected_value, actual_value, title, expected_display, actual_display) \ + do \ + { \ + if (auto result = assertEqual(#expected_value, \ + #actual_value, \ + (expected_value), \ + (actual_value), \ + (expected_display), \ + (actual_display), \ + title); \ + !result) \ + return result; \ } while (false) -#define ASSERT_EQUAL(expected_value, actual_value, title) \ - do \ - { \ - auto expected_v = (expected_value); \ - auto actual_v = (actual_value); \ - auto result = assertEqual(#expected_value, #actual_value, expected_v, actual_v, expected_v, actual_v, title); \ - if (!result) \ - return result; \ +#define ASSERT_EQUAL(expected_value, actual_value, title) \ + do \ + { \ + auto expected_v = (expected_value); \ + auto actual_v = (actual_value); \ + if (auto result = assertEqual(#expected_value, \ + #actual_value, \ + expected_v, \ + actual_v, \ + expected_v, \ + actual_v, \ + title); \ + !result) \ + return result; \ } while (false) ::testing::AssertionResult dataTypeEqual( @@ -82,7 +94,8 @@ ::testing::AssertionResult dataTypeEqual( ::testing::AssertionResult columnEqual( const ColumnPtr & expected, - const ColumnPtr & actual) + const ColumnPtr & actual, + bool is_floating_point) { ASSERT_EQUAL(expected->getName(), actual->getName(), "Column name mismatch"); ASSERT_EQUAL(expected->size(), actual->size(), "Column size mismatch"); @@ -92,7 +105,22 @@ ::testing::AssertionResult columnEqual( auto expected_field = (*expected)[i]; auto actual_field = (*actual)[i]; - ASSERT_EQUAL_WITH_TEXT(expected_field, actual_field, fmt::format("Value {} mismatch", i), expected_field.toString(), actual_field.toString()); + if (!is_floating_point) + { + ASSERT_EQUAL_WITH_TEXT(expected_field, actual_field, fmt::format("Value at index {} mismatch", i), expected_field.toString(), actual_field.toString()); + } + else + { + auto expected_field_expr = expected_field.toString(); + auto actual_field_expr = actual_field.toString(); + if (auto res = ::testing::internal::CmpHelperFloatingPointEQ( + expected_field_expr.c_str(), + actual_field_expr.c_str(), + expected_field.safeGet(), + actual_field.safeGet()); + !res) + return ::testing::AssertionFailure() << fmt::format("Value at index {} mismatch, ", i) << res.message(); + } } return ::testing::AssertionSuccess(); } @@ -101,11 +129,10 @@ ::testing::AssertionResult columnEqual( const ColumnWithTypeAndName & expected, const ColumnWithTypeAndName & actual) { - auto ret = dataTypeEqual(expected.type, actual.type); - if (!ret) + if (auto ret = dataTypeEqual(expected.type, actual.type); !ret) return ret; - return columnEqual(expected.column, actual.column); + return columnEqual(expected.column, actual.column, expected.type->isFloatingPoint()); } ::testing::AssertionResult blockEqual( @@ -115,7 +142,10 @@ ::testing::AssertionResult blockEqual( size_t columns = actual.columns(); size_t expected_columns = expected.columns(); - ASSERT_EQUAL(expected_columns, columns, "Block size mismatch"); + ASSERT_EQUAL( + expected_columns, + columns, + fmt::format("Block column size mismatch\nexpected_structure: {}\nstructure: {}", expected.dumpJsonStructure(), actual.dumpJsonStructure())); for (size_t i = 0; i < columns; ++i) { @@ -381,7 +411,7 @@ String getColumnsContent(const ColumnsWithTypeAndName & cols) { if (cols.size() <= 0) return ""; - return getColumnsContent(cols, 0, cols[0].column->size() - 1); + return getColumnsContent(cols, 0, cols[0].column->size()); } String getColumnsContent(const ColumnsWithTypeAndName & cols, size_t begin, size_t end) @@ -392,7 +422,7 @@ String getColumnsContent(const ColumnsWithTypeAndName & cols, size_t begin, size const size_t col_size = cols[0].column->size(); assert(begin <= end); - assert(col_size > end); + assert(col_size >= end); assert(col_size > begin); bool is_same = true; @@ -411,7 +441,7 @@ String getColumnsContent(const ColumnsWithTypeAndName & cols, size_t begin, size { /// Push the column name fmt_buf.append(fmt::format("{}: (", cols[i].name)); - for (size_t j = begin; j <= end; ++j) + for (size_t j = begin; j < end; ++j) col_content.push_back(std::make_pair(j, (*cols[i].column)[j].toString())); /// Add content diff --git a/dbms/src/TestUtils/FunctionTestUtils.h b/dbms/src/TestUtils/FunctionTestUtils.h index e75acabe56c..50ec3dcda07 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.h +++ b/dbms/src/TestUtils/FunctionTestUtils.h @@ -516,7 +516,7 @@ ColumnWithTypeAndName createConstColumn( String getColumnsContent(const ColumnsWithTypeAndName & cols); -/// We can designate the range of columns printed with begin and end. range: [begin, end] +/// We can designate the range of columns printed with begin and end. range: [begin, end) String getColumnsContent(const ColumnsWithTypeAndName & cols, size_t begin, size_t end); // This wrapper function only serves to construct columns input for function-like macros, @@ -531,7 +531,8 @@ ::testing::AssertionResult dataTypeEqual( ::testing::AssertionResult columnEqual( const ColumnPtr & expected, - const ColumnPtr & actual); + const ColumnPtr & actual, + bool is_floating_point = false); // ignore name ::testing::AssertionResult columnEqual( @@ -581,7 +582,11 @@ DataTypePtr getReturnTypeForFunction( bool raw_function_test = false); template -ColumnWithTypeAndName createNullableColumn(InferredDataVector init_vec, const std::vector & null_map, const String name = "") +ColumnWithTypeAndName createNullableColumn( + InferredDataVector init_vec, + const std::vector & null_map, + const String name = "", + Int64 column_id = 0) { static_assert(TypeTraits::is_nullable == false); auto updated_vec = InferredDataVector>(); @@ -593,15 +598,19 @@ ColumnWithTypeAndName createNullableColumn(InferredDataVector init_vec, const else updated_vec.push_back(init_vec[i]); } - return createColumn>(updated_vec, name); + return createColumn>(updated_vec, name, column_id); } template -ColumnWithTypeAndName createNullableColumn(InferredDataInitializerList init, const std::vector & null_map, const String name = "") +ColumnWithTypeAndName createNullableColumn( + InferredDataInitializerList init, + const std::vector & null_map, + const String name = "", + Int64 column_id = 0) { static_assert(TypeTraits::is_nullable == false); auto vec = InferredDataVector(init); - return createNullableColumn(vec, null_map, name); + return createNullableColumn(vec, null_map, name, column_id); } template diff --git a/dbms/src/TestUtils/InputStreamTestUtils.cpp b/dbms/src/TestUtils/InputStreamTestUtils.cpp new file mode 100644 index 00000000000..761efa8232e --- /dev/null +++ b/dbms/src/TestUtils/InputStreamTestUtils.cpp @@ -0,0 +1,350 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +namespace DB +{ +namespace tests +{ +size_t getInputStreamNRows(const BlockInputStreamPtr & stream) +{ + RUNTIME_CHECK(stream != nullptr, Exception(fmt::format("The inputstream is nullptr!"))); + + size_t num_rows_read = 0; + stream->readPrefix(); + while (true) + { + Block block = stream->read(); + // No more blocks + if (!block) + break; + block.checkNumberOfRows(); + num_rows_read += block.rows(); + } + stream->readSuffix(); + return num_rows_read; +} + +::testing::AssertionResult InputStreamRowsLengthCompare( + const char * stream_expr, + const char * nrows_expr, + const BlockInputStreamPtr & stream, + const size_t num_rows_expect) +{ + RUNTIME_CHECK(stream != nullptr, Exception(fmt::format("The first param of ASSERT_INPUTSTREAM_NROWS, `{}` is nullptr!", stream_expr))); + + size_t num_rows_read = 0; + stream->readPrefix(); + while (true) + { + try + { + Block read_block = stream->read(); + if (!read_block) + break; + read_block.checkNumberOfRows(); + num_rows_read += read_block.rows(); + } + catch (...) + { + return ::testing::AssertionFailure() << fmt::format("exception thrown while reading from {}. Error: {}", stream_expr, getCurrentExceptionMessage(true, false)); + } + } + stream->readSuffix(); + + if (num_rows_expect == num_rows_read) + return ::testing::AssertionSuccess(); + + auto reason = fmt::format(R"r( ({}).read() return num of rows + Which is: {} + {} + Which is: {})r", + stream_expr, + num_rows_read, + nrows_expr, + num_rows_expect); + return ::testing::AssertionFailure() << reason; +} + +::testing::AssertionResult InputStreamVSBlocksCompare( + const char * stream_expr, + const char * blocks_expr, + const BlockInputStreamPtr & stream, + const Blocks & blocks) +{ + RUNTIME_CHECK(stream != nullptr, Exception(fmt::format("The first param of ASSERT_INPUTSTREAM_NROWS, `{}` is nullptr!", stream_expr))); + + size_t block_idx = 0; + size_t num_rows_expect = 0; + size_t num_rows_read = 0; + stream->readPrefix(); + while (Block read_block = stream->read()) + { + read_block.checkNumberOfRows(); + + if (block_idx == blocks.size()) + { + auto reason = fmt::format(R"r( ({}).read() return more blocks as expected + {} only has {} blocks)r", + stream_expr, + blocks_expr, + blocks.size()); + return ::testing::AssertionFailure() << reason; + } + + blocks[block_idx].checkNumberOfRows(); // check the input + if (auto res = DB::tests::blockEqual(blocks[block_idx], read_block); !res) + { + auto reason = fmt::format(R"r( + ({}).read() return block is not equal to + the {} block in ({}))r", + stream_expr, + blocks_expr, + block_idx); + return res << reason; + } + + // continue to compare next block + num_rows_read += read_block.rows(); + num_rows_expect += blocks[block_idx].rows(); + block_idx++; + } + stream->readSuffix(); + + if (num_rows_expect == num_rows_read) + return ::testing::AssertionSuccess(); + + auto reason = fmt::format(R"r( ({}).read() return num of rows + Which is: {} + sum( ({}).rows() ) + Which is: {})r", + stream_expr, + num_rows_read, + blocks_expr, + num_rows_expect); + return ::testing::AssertionFailure() << reason; +} + +::testing::AssertionResult InputStreamVSBlockUnrestrictlyCompare( + const char * stream_expr, + const char * block_expr, + const BlockInputStreamPtr & stream, + const Block & expect_block) +{ + RUNTIME_CHECK(stream != nullptr, Exception(fmt::format("The first param of ASSERT_INPUTSTREAM_NROWS, `{}` is nullptr!", stream_expr))); + expect_block.checkNumberOfRows(); // check the input + + size_t num_rows_expect = expect_block.rows(); + size_t num_rows_read = 0; + size_t prev_num_rows_read = 0; + stream->readPrefix(); + while (Block read_block = stream->read()) + { + read_block.checkNumberOfRows(); + num_rows_read += read_block.rows(); + // hot path, first block from inputstream and the rows is as expected + if (prev_num_rows_read == 0 && read_block.rows() == num_rows_expect) + { + if (auto res = DB::tests::blockEqual(expect_block, read_block); !res) + { + auto reason = fmt::format(R"r( + ({}).read() return block is not equal + structure() == {} + to the expect block ({}) + structure() == {})r", + stream_expr, + read_block.dumpJsonStructure(), + block_expr, + expect_block.dumpJsonStructure()); + return res << reason; + } + } + + if (num_rows_read > num_rows_expect) + { + auto reason = fmt::format(R"r( + ({}).read() return more rows({}) than expected + ({}).rows() + Which is: {} + last block is: {})r", + stream_expr, + num_rows_read, + block_expr, + num_rows_expect, + getColumnsContent(read_block.getColumnsWithTypeAndName())); + return ::testing::AssertionFailure() << reason; + } + + // else, compare the the `block` to the [prev_num_rows_read, num_rows_read) rows of `rhs` + { + // num of columns + auto read_cols_expr = fmt::format("{}.read().columns()", stream_expr); + auto rcols_expr = fmt::format("{}.columns()", block_expr); + if (auto res = ::testing::internal::EqHelper::Compare( + read_cols_expr.c_str(), + rcols_expr.c_str(), + read_block.columns(), + expect_block.columns()); + !res) + { + return res; + } + for (size_t i = 0; i < expect_block.columns(); ++i) + { + const auto & actual_col = read_block.getByPosition(i); + const auto & expected_full_col = expect_block.getByPosition(i); + if (expected_full_col.column->isColumnConst() != actual_col.column->isColumnConst()) + { + // One is ColumnConst but the other is not + return ::testing::AssertionFailure() << fmt::format( + " block[{}].isColumnConst() from actual block\n {}\n expect_block[{}].isColumnConst()\n {}", + actual_col.name, + actual_col.column->isColumnConst(), + expected_full_col.name, + expected_full_col.column->isColumnConst()); + } + else if (expected_full_col.column->isColumnConst() && actual_col.column->isColumnConst()) + { + if (auto res = dataTypeEqual(expected_full_col.type, actual_col.type); !res) + return res; + if (auto res = ::testing::internal::EqHelper::Compare("", "", actual_col.column->size(), expected_full_col.column->size()); !res) + { + return res; + } + if (actual_col.column->compareAt(0, 0, *expected_full_col.column, -1) != 0) + { + return ::testing::AssertionFailure() << "Column Const data mismatch"; + } + } + else + { + auto expect_col = expected_full_col.cloneEmpty(); + auto column_data = expect_col.type->createColumn(); + column_data->insertRangeFrom(*expected_full_col.column, prev_num_rows_read, num_rows_read - prev_num_rows_read); + expect_col.column = std::move(column_data); + if (auto res = DB::tests::columnEqual(expect_col, actual_col); !res) + { + return res; + } + } + } + } + + prev_num_rows_read += read_block.rows(); + } + stream->readSuffix(); + + if (num_rows_expect == num_rows_read) + return ::testing::AssertionSuccess(); + + // Less rows than expected + auto reason = fmt::format(R"r( ({}).read() return num of rows + Which is: {} + the num rows of ({}) + Which is: {})r", + stream_expr, + num_rows_read, + block_expr, + num_rows_expect); + return ::testing::AssertionFailure() << reason; +} + +::testing::AssertionResult InputStreamVSBlockUnrestrictlyCompareColumns( + const char * stream_expr, + const char * /*colnames_expr*/, + const char * columns_expr, + const BlockInputStreamPtr & stream, + const Strings & colnames, + const ColumnsWithTypeAndName & columns) +{ + RUNTIME_CHECK(stream != nullptr, Exception(fmt::format("The first param of ASSERT_INPUTSTREAM_COLS_UR, `{}` is nullptr!", stream_expr))); + RUNTIME_CHECK( + colnames.size() == columns.size(), + Exception(fmt::format("The length of second and thrid param of ASSERT_INPUTSTREAM_COLS_UR not match! {} != {}", colnames.size(), columns.size()))); + + Block expect_block(columns); + expect_block.checkNumberOfRows(); // check the input + + size_t num_rows_expect = expect_block.rows(); + size_t num_rows_read = 0; + size_t prev_num_rows_read = 0; + stream->readPrefix(); + while (Block read_block = stream->read()) + { + num_rows_read += read_block.rows(); + + if (num_rows_read > num_rows_expect) + { + auto reason = fmt::format(R"r( + ({}).read() return more rows({}) than expected + ({}).rows() + Which is: {} + last block is: {})r", + stream_expr, + num_rows_read, + columns_expr, + num_rows_expect, + getColumnsContent(read_block.getColumnsWithTypeAndName())); + return ::testing::AssertionFailure() << reason; + } + + // else, compare the the `read_block` to the [prev_num_rows_read, num_rows_read) rows of `expect_block` + for (size_t col_idx = 0; col_idx < colnames.size(); ++col_idx) + { + const auto & col_name = colnames[col_idx]; + // Copy the [prev_num_rows_read, num_rows_read) of `expect_block` + const auto & expect_full_col = expect_block.getByPosition(col_idx); + auto expect_col = expect_full_col.cloneEmpty(); + auto column_data = expect_col.type->createColumn(); + column_data->insertRangeFrom(*expect_full_col.column, prev_num_rows_read, num_rows_read - prev_num_rows_read); + expect_col.column = std::move(column_data); + + const auto & actual_col = read_block.getByName(col_name); + if (auto res = DB::tests::columnEqual(expect_col, actual_col); !res) + { + auto expect_expr = fmt::format("expect block: {}", getColumnsContent(expect_block.getColumnsWithTypeAndName(), prev_num_rows_read, num_rows_read)); + Block actual_block_to_cmp; + for (const auto & col_name : colnames) + actual_block_to_cmp.insert(read_block.getByName(col_name)); + auto actual_expr = fmt::format("actual block: {}", getColumnsContent(actual_block_to_cmp.getColumnsWithTypeAndName())); + return res << fmt::format("\n details: [column={}] [prev_nrows={}] [cur_nrows={}]:\n {}\n {}", col_name, prev_num_rows_read, num_rows_read, expect_expr, actual_expr); + } + } + + prev_num_rows_read += read_block.rows(); + } + stream->readSuffix(); + + if (num_rows_expect == num_rows_read) + return ::testing::AssertionSuccess(); + + // Less rows than expected + auto reason = fmt::format(R"r( ({}).read() return num of rows + Which is: {} + the num rows of ({}) + Which is: {})r", + stream_expr, + num_rows_read, + columns_expr, + num_rows_expect); + return ::testing::AssertionFailure() << reason; +} + +} // namespace tests +} // namespace DB diff --git a/dbms/src/TestUtils/InputStreamTestUtils.h b/dbms/src/TestUtils/InputStreamTestUtils.h new file mode 100644 index 00000000000..82b2768df16 --- /dev/null +++ b/dbms/src/TestUtils/InputStreamTestUtils.h @@ -0,0 +1,75 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include +namespace DB +{ +class IBlockInputStream; +using BlockInputStreamPtr = std::shared_ptr; + +namespace tests +{ +/// helper functions for comparing the result of input stream + +// Get the num of rows read from inputstream +size_t getInputStreamNRows(const BlockInputStreamPtr & stream); + +// Checking the num of rows read from inputstream +::testing::AssertionResult InputStreamRowsLengthCompare( + const char * stream_expr, + const char * nrows_expr, + const BlockInputStreamPtr & stream, + const size_t num_rows_expect); +#define ASSERT_INPUTSTREAM_NROWS(val1, val2) ASSERT_PRED_FORMAT2(::DB::tests::InputStreamRowsLengthCompare, val1, val2) + +// Checking the blocks read from inputstream. +// The inputstream must return the blocks strictly equal to `blocks`. +::testing::AssertionResult InputStreamVSBlocksCompare( + const char * stream_expr, + const char * blocks_expr, + const BlockInputStreamPtr & stream, + const Blocks & blocks); +#define ASSERT_INPUTSTREAM_BLOCKS(val1, val2) ASSERT_PRED_FORMAT2(::DB::tests::InputStreamVSBlocksCompare, val1, val2) + +// Unrestrictly checking the blocks read from inputstream. +// Allowing the inputstream break the rows into serval smaller blocks. +::testing::AssertionResult InputStreamVSBlockUnrestrictlyCompare( + const char * stream_expr, + const char * block_expr, + const BlockInputStreamPtr & stream, + const Block & expect_block); +#define ASSERT_INPUTSTREAM_BLOCK_UR(val1, val2) ASSERT_PRED_FORMAT2(::DB::tests::InputStreamVSBlockUnrestrictlyCompare, val1, val2) + +// Unrestrictly checking a part of columns read from inputstream. +// Allowing the inputstream break the rows into serval smaller blocks. +// Only check the read columns with name in `colnames` +// The size of `colnames` must be the same as `colunms` +::testing::AssertionResult InputStreamVSBlockUnrestrictlyCompareColumns( + const char * stream_expr, + const char * colnames_expr, + const char * columns_expr, + const BlockInputStreamPtr & stream, + const Strings & colnames, + const ColumnsWithTypeAndName & columns); +#define ASSERT_INPUTSTREAM_COLS_UR(stream, colnames, columns) \ + ASSERT_PRED_FORMAT3(::DB::tests::InputStreamVSBlockUnrestrictlyCompareColumns, stream, colnames, columns) + +} // namespace tests +} // namespace DB diff --git a/dbms/src/TestUtils/tests/gtest_function_test_utils.cpp b/dbms/src/TestUtils/tests/gtest_function_test_utils.cpp index b7fd8cb7e7f..40b8f41d48d 100644 --- a/dbms/src/TestUtils/tests/gtest_function_test_utils.cpp +++ b/dbms/src/TestUtils/tests/gtest_function_test_utils.cpp @@ -18,11 +18,16 @@ namespace DB { namespace tests { -class TestFunctionTestUtils : public ::testing::Test +TEST(TestFunctionTestUtils, CompareFloat64Column) +try { -}; + ASSERT_COLUMN_EQ( + createColumn({1.23456789}), + createColumn({1.23456789})); +} +CATCH -TEST_F(TestFunctionTestUtils, ParseDecimal) +TEST(TestFunctionTestUtils, ParseDecimal) try { using DecimalField64 = DecimalField; @@ -58,7 +63,7 @@ try } CATCH -TEST_F(TestFunctionTestUtils, CreateDecimalColumn) +TEST(TestFunctionTestUtils, CreateDecimalColumn) try { using DecimalField64 = DecimalField; diff --git a/dbms/src/TestUtils/tests/gtest_inputstream_test_utils.cpp b/dbms/src/TestUtils/tests/gtest_inputstream_test_utils.cpp new file mode 100644 index 00000000000..64b57c3a5f9 --- /dev/null +++ b/dbms/src/TestUtils/tests/gtest_inputstream_test_utils.cpp @@ -0,0 +1,302 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +namespace DB +{ +namespace tests +{ +TEST(InputStreamTestUtilsTest, RawFuncPass) +try +{ + ASSERT_ANY_THROW(InputStreamVSBlockUnrestrictlyCompare("", "", nullptr, Block());); + + { + // pass + BlocksList blocks{ + Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")}), + }; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + EXPECT_TRUE( + InputStreamVSBlockUnrestrictlyCompare( + "", + "", + in, + Block({ + createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({123, 456, 1, 2, 3, 4}, "col2"), + }))); + } + { + // pass + BlocksList blocks{ + Block({createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({123, 456, 1, 2, 3, 4}, "col2")}), + }; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + EXPECT_TRUE( + InputStreamVSBlockUnrestrictlyCompare( + "", + "", + in, + Block({ + createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({123, 456, 1, 2, 3, 4}, "col2"), + }))); + } +} +CATCH + +TEST(InputStreamTestUtilsTest, RawFuncColumnNotMatch) +try +{ + { + // column num not match + BlocksList blocks{Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")})}; + auto res = InputStreamVSBlockUnrestrictlyCompare( + "in", + "block", + std::make_unique(std::move(blocks)), + Block({ + createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({123, 456, 1, 2, 3, 4}, "col2"), + createColumn({123, 456, 1, 2, 3, 4}, "col3"), + })); + EXPECT_FALSE(res); + } + { + // column num not match + BlocksList blocks{Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")})}; + auto res = InputStreamVSBlockUnrestrictlyCompare( + "in", + "block", + std::make_unique(std::move(blocks)), + Block({ + createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"}, "col1"), + })); + EXPECT_FALSE(res); + } +} +CATCH + +TEST(InputStreamTestUtilsTest, RawFuncRowsNotMatch) +try +{ + { + // rows not match + BlocksList blocks{Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")})}; + auto res = InputStreamVSBlockUnrestrictlyCompare( + "in", + "block", + std::make_unique(std::move(blocks)), + Block({ + createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash", "br"}, "col1"), + createColumn({123, 456, 1, 2, 3, 4, 5}, "col2"), + })); + EXPECT_FALSE(res); + } + { + // rows not match + BlocksList blocks{Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")})}; + auto res = InputStreamVSBlockUnrestrictlyCompare( + "in", + "block", + std::make_unique(std::move(blocks)), + Block({ + createColumn({"hello", "world", "tikv", "tidb", "pd"}, "col1"), + createColumn({123, 456, 1, 2, 3}, "col2"), + })); + EXPECT_FALSE(res); + } +} +CATCH + +TEST(InputStreamTestUtilsTest, RawFuncCellNotMatch) +try +{ + ASSERT_ANY_THROW(InputStreamVSBlockUnrestrictlyCompare("", "", nullptr, Block());); + + { + BlocksList blocks{ + Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")}), + }; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + auto res = InputStreamVSBlockUnrestrictlyCompare( + "", + "", + in, + Block({ + createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({123, 456, 1, 2, 3, /*wrong value*/ 5}, "col2"), + })); + EXPECT_FALSE(res); + } + { + // pass + BlocksList blocks{ + Block({createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({123, 456, 1, 2, 3, 4}, "col2")}), + }; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + auto res = InputStreamVSBlockUnrestrictlyCompare( + "", + "", + in, + Block({ + createColumn({"hello", "world", "tikv", "tidb", "pd", /*wrong value*/ "flash"}, "col1"), + createColumn({123, 456, 1, 2, 3, 4}, "col2"), + })); + EXPECT_FALSE(res); + } +} +CATCH + +TEST(InputStreamTestUtilsTest, CompareInputStreamNRows) +try +{ + // Only check number of rows + { + BlocksList blocks{}; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + ASSERT_INPUTSTREAM_NROWS(in, 0); + } + { + BlocksList blocks{Block({createConstColumn(100, "test_data", "col1"), + createConstColumn(100, 123, "col2")})}; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + ASSERT_INPUTSTREAM_NROWS(in, 100); + } + { + BlocksList blocks{Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")})}; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + ASSERT_INPUTSTREAM_NROWS(in, 6); + } +} +CATCH + +TEST(InputStreamTestUtilsTest, CompareInputStreamBlock) +try +{ + // Check the block read + { + BlocksList blocks{}; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + ASSERT_INPUTSTREAM_BLOCKS(in, Blocks()); + } + { + BlocksList blocks{Block({createConstColumn(100, "test_data", "col1"), + createConstColumn(100, 123, "col2")})}; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + ASSERT_INPUTSTREAM_BLOCKS(in, Blocks({Block{createConstColumn(100, "test_data", "col1"), createConstColumn(100, 123, "col2")}})); + } + { + BlocksList blocks{Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")})}; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + ASSERT_INPUTSTREAM_BLOCKS( + in, + Blocks({ + Block({createColumn({"hello", "world"}, "col1"), createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), createColumn({1, 2, 3, 4}, "col2")}), + })); + } + + // unrestrictly check + { + //the input stream return smaller blocks + BlocksList blocks{Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")})}; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + ASSERT_INPUTSTREAM_BLOCK_UR( + in, + Block({// + createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({123, 456, 1, 2, 3, 4}, "col2")})); + } + { + //the input stream return excatly the same block + BlocksList blocks{Block({createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({123, 456, 1, 2, 3, 4}, "col2")})}; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + ASSERT_INPUTSTREAM_BLOCK_UR( + in, + Block({// + createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({123, 456, 1, 2, 3, 4}, "col2")})); + } +} +CATCH + +TEST(InputStreamTestUtilsTest, CompareInputStreamColumns) +try +{ + // unrestrictly check a part of columns + { + //the input stream return smaller blocks, only check col1 + BlocksList blocks{Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")})}; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({"col1"}), + createColumns({createColumn({"hello", "world", "tikv", "tidb", "pd", "tiflash"})})); + } + { + //the input stream return smaller blocks, only check col2 + BlocksList blocks{Block({createColumn({"hello", "world"}, "col1"), + createColumn({123, 456}, "col2")}), + Block({createColumn({"tikv", "tidb", "pd", "tiflash"}, "col1"), + createColumn({1, 2, 3, 4}, "col2")})}; + BlockInputStreamPtr in = std::make_unique(std::move(blocks)); + ASSERT_INPUTSTREAM_COLS_UR( + in, + Strings({"col2"}), + createColumns({createColumn({123, 456, 1, 2, 3, 4})})); + } +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/TestUtils/tests/gtest_print_columns.cpp b/dbms/src/TestUtils/tests/gtest_print_columns.cpp index 50631fc4f4a..4930d0adefc 100644 --- a/dbms/src/TestUtils/tests/gtest_print_columns.cpp +++ b/dbms/src/TestUtils/tests/gtest_print_columns.cpp @@ -19,7 +19,6 @@ namespace DB { namespace tests { - class PrintColumnsTest : public DB::tests::ExecutorTest { public: @@ -47,9 +46,9 @@ TEST_F(PrintColumnsTest, SimpleTest) try { EXPECT_EQ(getColumnsContent(test_cols), result1); - EXPECT_EQ(getColumnsContent(test_cols, 0, col_len - 1), result2); - EXPECT_EQ(getColumnsContent(test_cols, 0, 0), result3); - EXPECT_EQ(getColumnsContent(test_cols, 1, col_len - 2), result4); + EXPECT_EQ(getColumnsContent(test_cols, 0, col_len), result2); + EXPECT_EQ(getColumnsContent(test_cols, 0, 1), result3); + EXPECT_EQ(getColumnsContent(test_cols, 1, col_len - 1), result4); } CATCH From ccf185474052b91b702e07aabde35ad0d4beabad Mon Sep 17 00:00:00 2001 From: hongyunyan <649330952@qq.com> Date: Fri, 29 Jul 2022 23:05:12 +0800 Subject: [PATCH 13/17] fix the schema version error in sync schema when latest schema diff is empty (#5506) close pingcap/tiflash#5397, close pingcap/tiflash#5499 --- dbms/src/TiDB/Schema/TiDBSchemaSyncer.h | 47 ++++++++++++++----------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h b/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h index a23aeab139f..880828c1259 100644 --- a/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h +++ b/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h @@ -124,9 +124,7 @@ struct TiDBSchemaSyncer : public SchemaSyncer if (version_after_load_diff = tryLoadSchemaDiffs(getter, version, context); version_after_load_diff == -1) { GET_METRIC(tiflash_schema_apply_count, type_full).Increment(); - loadAllSchema(getter, version, context); - // After loadAllSchema, we need update `version_after_load_diff` by last diff value exist or not - version_after_load_diff = getter.checkSchemaDiffExists(version) ? version : version - 1; + version_after_load_diff = loadAllSchema(getter, version, context); } cur_version = version_after_load_diff; GET_METRIC(tiflash_schema_version).Set(cur_version); @@ -167,8 +165,6 @@ struct TiDBSchemaSyncer : public SchemaSyncer LOG_FMT_DEBUG(log, "Try load schema diffs."); - SchemaBuilder builder(getter, context, databases, latest_version); - Int64 used_version = cur_version; // First get all schema diff from `cur_version` to `latest_version`. Only apply the schema diff(s) if we fetch all // schema diff without any exception. @@ -180,6 +176,22 @@ struct TiDBSchemaSyncer : public SchemaSyncer } LOG_FMT_DEBUG(log, "End load schema diffs with total {} entries.", diffs.size()); + + if (diffs.empty()) + { + LOG_FMT_WARNING(log, "Schema Diff is empty."); + return -1; + } + // Since the latest schema diff may be empty, and schemaBuilder may need to update the latest version for storageDeltaMerge, + // Thus we need check whether latest schema diff is empty or not before begin to builder.applyDiff. + if (!diffs.back()) + { + --used_version; + diffs.pop_back(); + } + + SchemaBuilder builder(getter, context, databases, used_version); + try { for (size_t diff_index = 0; diff_index < diffs.size(); ++diff_index) @@ -188,25 +200,15 @@ struct TiDBSchemaSyncer : public SchemaSyncer if (!schema_diff) { - // If `schema diff` from `latest_version` got empty `schema diff` - // Then we won't apply to `latest_version`, but we will apply to `latest_version - 1` - // If `schema diff` from [`cur_version`, `latest_version - 1`] got empty `schema diff` - // Then we should just skip it. + // If `schema diff` got empty `schema diff`(it's not the latest one, due to we check it before), we should just skip it. // // example: // - `cur_version` is 1, `latest_version` is 10 // - The schema diff of schema version [2,4,6] is empty, Then we just skip it. - // - The schema diff of schema version 10 is empty, Then we should just apply version into 9 - if (diff_index != diffs.size() - 1) - { - LOG_FMT_WARNING(log, "Skip the schema diff from version {}. ", cur_version + diff_index + 1); - continue; - } - - // if diff_index == diffs.size() - 1, return used_version - 1; - return used_version - 1; + // - The schema diff of schema version 10 is empty, Then we should just apply version into 9(which we check it before) + LOG_FMT_WARNING(log, "Skip the schema diff from version {}. ", cur_version + diff_index + 1); + continue; } - builder.applyDiff(*schema_diff); } } @@ -245,10 +247,15 @@ struct TiDBSchemaSyncer : public SchemaSyncer return used_version; } - void loadAllSchema(Getter & getter, Int64 version, Context & context) + Int64 loadAllSchema(Getter & getter, Int64 version, Context & context) { + if (!getter.checkSchemaDiffExists(version)) + { + --version; + } SchemaBuilder builder(getter, context, databases, version); builder.syncAllSchema(); + return version; } }; From a89526c462c83d728636b8ea8fdc96265469e5dd Mon Sep 17 00:00:00 2001 From: JaySon Date: Fri, 29 Jul 2022 23:53:11 +0800 Subject: [PATCH 14/17] *: Refine the ExchangeReceiverResult (#5494) ref pingcap/tiflash#3994 --- dbms/src/Common/ThreadManager.cpp | 13 ++++--- dbms/src/Common/ThreadManager.h | 4 +- dbms/src/DataStreams/IBlockInputStream.h | 2 +- dbms/src/Flash/Coprocessor/DAGContext.cpp | 38 +++++++++---------- dbms/src/Flash/Coprocessor/DAGContext.h | 4 +- dbms/src/Flash/Coprocessor/DAGDriver.cpp | 3 +- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 6 +-- .../Flash/Coprocessor/InterpreterUtils.cpp | 2 +- dbms/src/Flash/Coprocessor/RemoteRequest.h | 2 +- .../StreamingDAGResponseWriter.cpp | 5 ++- dbms/src/Flash/Mpp/ExchangeReceiver.cpp | 16 ++++---- dbms/src/Flash/Mpp/ExchangeReceiver.h | 24 ++++++++++-- dbms/src/Flash/Mpp/MPPTask.cpp | 5 +-- dbms/src/Flash/Mpp/MinTSOScheduler.cpp | 2 +- 14 files changed, 70 insertions(+), 56 deletions(-) diff --git a/dbms/src/Common/ThreadManager.cpp b/dbms/src/Common/ThreadManager.cpp index b55c36e80f3..32f13ea5108 100644 --- a/dbms/src/Common/ThreadManager.cpp +++ b/dbms/src/Common/ThreadManager.cpp @@ -42,16 +42,17 @@ void waitTasks(std::vector> & futures) std::rethrow_exception(first_exception); } -class DynamicThreadManager : public ThreadManager +class DynamicThreadManager + : public ThreadManager , public ThreadPoolManager { public: - void scheduleThenDetach(bool propagate_memory_tracker, String /*thread_name*/, ThreadManager::Job job) override + void scheduleThenDetach(bool propagate_memory_tracker, std::string /*thread_name*/, ThreadManager::Job job) override { DynamicThreadPool::global_instance->scheduleRaw(propagate_memory_tracker, std::move(job)); } - void schedule(bool propagate_memory_tracker, String /*thread_name*/, ThreadManager::Job job) override + void schedule(bool propagate_memory_tracker, std::string /*thread_name*/, ThreadManager::Job job) override { futures.push_back(DynamicThreadPool::global_instance->schedule(propagate_memory_tracker, std::move(job))); } @@ -73,13 +74,13 @@ class DynamicThreadManager : public ThreadManager class RawThreadManager : public ThreadManager { public: - void schedule(bool propagate_memory_tracker, String thread_name, Job job) override + void schedule(bool propagate_memory_tracker, std::string thread_name, Job job) override { auto t = ThreadFactory::newThread(propagate_memory_tracker, std::move(thread_name), std::move(job)); workers.push_back(std::move(t)); } - void scheduleThenDetach(bool propagate_memory_tracker, String thread_name, Job job) override + void scheduleThenDetach(bool propagate_memory_tracker, std::string thread_name, Job job) override { auto t = ThreadFactory::newThread(propagate_memory_tracker, std::move(thread_name), std::move(job)); t.detach(); @@ -90,7 +91,7 @@ class RawThreadManager : public ThreadManager waitAndClear(); } - ~RawThreadManager() + ~RawThreadManager() override { waitAndClear(); } diff --git a/dbms/src/Common/ThreadManager.h b/dbms/src/Common/ThreadManager.h index f4f4f5aa0db..f06c98f9d3b 100644 --- a/dbms/src/Common/ThreadManager.h +++ b/dbms/src/Common/ThreadManager.h @@ -27,8 +27,8 @@ class ThreadManager virtual ~ThreadManager() = default; // only wait non-detached tasks virtual void wait() = 0; - virtual void schedule(bool propagate_memory_tracker, String thread_name, Job job) = 0; - virtual void scheduleThenDetach(bool propagate_memory_tracker, String thread_name, Job job) = 0; + virtual void schedule(bool propagate_memory_tracker, std::string thread_name, Job job) = 0; + virtual void scheduleThenDetach(bool propagate_memory_tracker, std::string thread_name, Job job) = 0; }; std::shared_ptr newThreadManager(); diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 472eac282d4..a5e05f80f97 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -135,7 +135,7 @@ class IBlockInputStream : private boost::noncopyable */ void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } - void setExtraInfo(String info) { extra_info = info; } + void setExtraInfo(String info) { extra_info = std::move(info); } template void forEachChild(F && f) diff --git a/dbms/src/Flash/Coprocessor/DAGContext.cpp b/dbms/src/Flash/Coprocessor/DAGContext.cpp index 1cf7a0d6c87..9fc07489c94 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.cpp +++ b/dbms/src/Flash/Coprocessor/DAGContext.cpp @@ -30,8 +30,6 @@ extern const int DIVIDED_BY_ZERO; extern const int INVALID_TIME; } // namespace ErrorCodes -const String enableFineGrainedShuffleExtraInfo = "enable fine grained shuffle"; - bool strictSqlMode(UInt64 sql_mode) { return sql_mode & TiDBSQLMode::STRICT_ALL_TABLES || sql_mode & TiDBSQLMode::STRICT_TRANS_TABLES; @@ -86,25 +84,25 @@ void DAGContext::initExecutorIdToJoinIdMap() { // only mpp task has join executor // for mpp, all executor has executor id. - if (isMPPTask()) - { - executor_id_to_join_id_map.clear(); - traverseExecutorsReverse(dag_request, [&](const tipb::Executor & executor) { - std::vector all_join_id; - // for mpp, dag_request.has_root_executor() == true, can call `getChildren` directly. - getChildren(executor).forEach([&](const tipb::Executor & child) { - assert(child.has_executor_id()); - auto child_it = executor_id_to_join_id_map.find(child.executor_id()); - if (child_it != executor_id_to_join_id_map.end()) - all_join_id.insert(all_join_id.end(), child_it->second.begin(), child_it->second.end()); - }); - assert(executor.has_executor_id()); - if (executor.tp() == tipb::ExecType::TypeJoin) - all_join_id.push_back(executor.executor_id()); - if (!all_join_id.empty()) - executor_id_to_join_id_map[executor.executor_id()] = all_join_id; + if (!isMPPTask()) + return; + + executor_id_to_join_id_map.clear(); + traverseExecutorsReverse(dag_request, [&](const tipb::Executor & executor) { + std::vector all_join_id; + // for mpp, dag_request.has_root_executor() == true, can call `getChildren` directly. + getChildren(executor).forEach([&](const tipb::Executor & child) { + assert(child.has_executor_id()); + auto child_it = executor_id_to_join_id_map.find(child.executor_id()); + if (child_it != executor_id_to_join_id_map.end()) + all_join_id.insert(all_join_id.end(), child_it->second.begin(), child_it->second.end()); }); - } + assert(executor.has_executor_id()); + if (executor.tp() == tipb::ExecType::TypeJoin) + all_join_id.push_back(executor.executor_id()); + if (!all_join_id.empty()) + executor_id_to_join_id_map[executor.executor_id()] = all_join_id; + }); } std::unordered_map> & DAGContext::getExecutorIdToJoinIdMap() diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index 1e9d2d3628e..4613568ac0d 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -122,7 +122,7 @@ inline bool enableFineGrainedShuffle(uint64_t stream_count) return stream_count > 0; } -extern const String enableFineGrainedShuffleExtraInfo; +static constexpr std::string_view enableFineGrainedShuffleExtraInfo = "enable fine grained shuffle"; /// A context used to track the information that needs to be passed around during DAG planning. class DAGContext @@ -393,7 +393,7 @@ class DAGContext std::vector subqueries; bool is_test = false; /// switch for test, do not use it in production. - std::unordered_map columns_for_test_map; /// , for multiple sources + std::unordered_map columns_for_test_map; /// , for multiple sources }; } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGDriver.cpp b/dbms/src/Flash/Coprocessor/DAGDriver.cpp index 9fe388f8fe4..600c860f252 100644 --- a/dbms/src/Flash/Coprocessor/DAGDriver.cpp +++ b/dbms/src/Flash/Coprocessor/DAGDriver.cpp @@ -145,8 +145,7 @@ try copyData(*streams.in, *dag_output_stream); } - auto throughput = dag_context.getTableScanThroughput(); - if (throughput.first) + if (auto throughput = dag_context.getTableScanThroughput(); throughput.first) GET_METRIC(tiflash_storage_logical_throughput_bytes).Observe(throughput.second); if (context.getProcessListElement()) diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index b2e344189c3..d07a74b4ac7 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -358,7 +358,7 @@ void DAGQueryBlockInterpreter::executeWindow( /// Window function can be multiple threaded when fine grained shuffle is enabled. pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, window_description, log->identifier()); - stream->setExtraInfo(enableFineGrainedShuffleExtraInfo); + stream->setExtraInfo(String(enableFineGrainedShuffleExtraInfo)); }); } else @@ -466,7 +466,7 @@ void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) size_t stream_count = max_streams; if (enable_fine_grained_shuffle) { - extra_info += ", " + enableFineGrainedShuffleExtraInfo; + extra_info += ", " + String(enableFineGrainedShuffleExtraInfo); stream_count = std::min(max_streams, exchange_receiver->getFineGrainedShuffleStreamCount()); } @@ -743,7 +743,7 @@ void DAGQueryBlockInterpreter::handleExchangeSender(DAGPipeline & pipeline) stream_count, batch_size); stream = std::make_shared(stream, std::move(response_writer), log->identifier()); - stream->setExtraInfo(enableFineGrainedShuffleExtraInfo); + stream->setExtraInfo(String(enableFineGrainedShuffleExtraInfo)); }); RUNTIME_CHECK(exchange_sender.tp() == tipb::ExchangeType::Hash, Exception("exchange_sender has to be hash partition when fine grained shuffle is enabled")); RUNTIME_CHECK(stream_count <= 1024, Exception("fine_grained_shuffle_stream_count should not be greater than 1024")); diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp index 034643a6514..3c07071785a 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp @@ -147,7 +147,7 @@ void orderStreams( settings.max_bytes_before_external_sort, context.getTemporaryPath(), log->identifier()); - stream->setExtraInfo(enableFineGrainedShuffleExtraInfo); + stream->setExtraInfo(String(enableFineGrainedShuffleExtraInfo)); }); } else diff --git a/dbms/src/Flash/Coprocessor/RemoteRequest.h b/dbms/src/Flash/Coprocessor/RemoteRequest.h index 5af3f66298c..ce78e0b62fe 100644 --- a/dbms/src/Flash/Coprocessor/RemoteRequest.h +++ b/dbms/src/Flash/Coprocessor/RemoteRequest.h @@ -31,7 +31,7 @@ namespace DB { using RegionRetryList = std::list>; -using DAGColumnInfo = std::pair; +using DAGColumnInfo = std::pair; using DAGSchema = std::vector; struct RemoteRequest diff --git a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp index a72dfcc16ef..6e70f280e6f 100644 --- a/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp +++ b/dbms/src/Flash/Coprocessor/StreamingDAGResponseWriter.cpp @@ -271,8 +271,9 @@ void StreamingDAGResponseWriter::h template template -void StreamingDAGResponseWriter::writePackets(const std::vector & responses_row_count, - std::vector & packets) const +void StreamingDAGResponseWriter::writePackets( + const std::vector & responses_row_count, + std::vector & packets) const { for (size_t part_id = 0; part_id < packets.size(); ++part_id) { diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp index f5808952740..3538d0238e3 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp @@ -617,7 +617,7 @@ void ExchangeReceiverBase::readLoop(const Request & req) break; } } - // if meet error, such as decode packect fails, it will not retry. + // if meet error, such as decode packet fails, it will not retry. if (meet_error) { break; @@ -701,7 +701,7 @@ ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queue= msg_channels.size())) { LOG_FMT_ERROR(exc_log, "stream_id out of range, stream_id: {}, total_stream_count: {}", stream_id, msg_channels.size()); - return {nullptr, 0, "", true, "stream_id out of range", false}; + return ExchangeReceiverResult::newError(0, "", "stream_id out of range"); } std::shared_ptr recv_msg; if (!msg_channels[stream_id]->pop(recv_msg)) @@ -719,18 +719,18 @@ ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queueerror_ptr != nullptr) { - result = {nullptr, recv_msg->source_index, recv_msg->req_info, true, recv_msg->error_ptr->msg(), false}; + result = ExchangeReceiverResult::newError(recv_msg->source_index, recv_msg->req_info, recv_msg->error_ptr->msg()); } else { @@ -739,12 +739,12 @@ ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queue(); if (!select_resp->ParseFromString(*(recv_msg->resp_ptr))) { - result = {nullptr, recv_msg->source_index, recv_msg->req_info, true, "decode error", false}; + result = ExchangeReceiverResult::newError(recv_msg->source_index, recv_msg->req_info, "decode error"); } else { - result = {select_resp, recv_msg->source_index, recv_msg->req_info, false, "", false}; /// If mocking TiFlash as TiDB, here should decode chunks from select_resp. + result = ExchangeReceiverResult::newOk(select_resp, recv_msg->source_index, recv_msg->req_info); if (!select_resp->chunks().empty()) { assert(recv_msg->chunks.empty()); @@ -757,7 +757,7 @@ ExchangeReceiverResult ExchangeReceiverBase::nextResult(std::queuesource_index, recv_msg->req_info, false, "", false}; + result = ExchangeReceiverResult::newOk(nullptr, recv_msg->source_index, recv_msg->req_info); } if (!result.meet_error && !recv_msg->chunks.empty()) { diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.h b/dbms/src/Flash/Mpp/ExchangeReceiver.h index 708f133f226..9213eb76e60 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.h +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.h @@ -69,6 +69,26 @@ struct ExchangeReceiverResult bool eof; DecodeDetail decode_detail; + ExchangeReceiverResult() + : ExchangeReceiverResult(nullptr, 0) + {} + + static ExchangeReceiverResult newOk(std::shared_ptr resp_, size_t call_index_, const String & req_info_) + { + return {resp_, call_index_, req_info_, /*meet_error*/ false, /*error_msg*/ "", /*eof*/ false}; + } + + static ExchangeReceiverResult newEOF(const String & req_info_) + { + return {/*resp*/ nullptr, 0, req_info_, /*meet_error*/ false, /*error_msg*/ "", /*eof*/ true}; + } + + static ExchangeReceiverResult newError(size_t call_index, const String & req_info, const String & error_msg) + { + return {/*resp*/ nullptr, call_index, req_info, /*meet_error*/ true, error_msg, /*eof*/ false}; + } + +private: ExchangeReceiverResult( std::shared_ptr resp_, size_t call_index_, @@ -83,10 +103,6 @@ struct ExchangeReceiverResult , error_msg(error_msg_) , eof(eof_) {} - - ExchangeReceiverResult() - : ExchangeReceiverResult(nullptr, 0) - {} }; enum class ExchangeReceiverState diff --git a/dbms/src/Flash/Mpp/MPPTask.cpp b/dbms/src/Flash/Mpp/MPPTask.cpp index def9bd70efd..fc333c7e1fa 100644 --- a/dbms/src/Flash/Mpp/MPPTask.cpp +++ b/dbms/src/Flash/Mpp/MPPTask.cpp @@ -347,7 +347,7 @@ void MPPTask::runImpl() LOG_FMT_INFO(log, "task starts preprocessing"); preprocess(); needed_threads = estimateCountOfNewThreads(); - LOG_FMT_DEBUG(log, "Estimate new thread count of query :{} including tunnel_threads: {} , receiver_threads: {}", needed_threads, dag_context->tunnel_set->getRemoteTunnelCnt(), new_thread_count_of_exchange_receiver); + LOG_FMT_DEBUG(log, "Estimate new thread count of query: {} including tunnel_threads: {}, receiver_threads: {}", needed_threads, dag_context->tunnel_set->getRemoteTunnelCnt(), new_thread_count_of_exchange_receiver); scheduleOrWait(); @@ -393,8 +393,7 @@ void MPPTask::runImpl() if (status == FINISHED) { // todo when error happens, should try to update the metrics if it is available - auto throughput = dag_context->getTableScanThroughput(); - if (throughput.first) + if (auto throughput = dag_context->getTableScanThroughput(); throughput.first) GET_METRIC(tiflash_storage_logical_throughput_bytes).Observe(throughput.second); auto process_info = context->getProcessListElement()->getInfo(); auto peak_memory = process_info.peak_memory_usage > 0 ? process_info.peak_memory_usage : 0; diff --git a/dbms/src/Flash/Mpp/MinTSOScheduler.cpp b/dbms/src/Flash/Mpp/MinTSOScheduler.cpp index 967bfcecfa3..0ee05c9dae0 100644 --- a/dbms/src/Flash/Mpp/MinTSOScheduler.cpp +++ b/dbms/src/Flash/Mpp/MinTSOScheduler.cpp @@ -194,7 +194,7 @@ bool MinTSOScheduler::scheduleImp(const UInt64 tso, const MPPQueryTaskSetPtr & q GET_METRIC(tiflash_task_scheduler, type_active_queries_count).Set(active_set.size()); GET_METRIC(tiflash_task_scheduler, type_estimated_thread_usage).Set(estimated_thread_usage); GET_METRIC(tiflash_task_scheduler, type_active_tasks_count).Increment(); - LOG_FMT_INFO(log, "{} is scheduled (active set size = {}) due to available threads {}, after applied for {} threads, used {} of the thread {} limit {}.", task->getId().toString(), active_set.size(), isWaiting ? " from the waiting set" : " directly", needed_threads, estimated_thread_usage, min_tso == tso ? "hard" : "soft", min_tso == tso ? thread_hard_limit : thread_soft_limit); + LOG_FMT_INFO(log, "{} is scheduled (active set size = {}) due to available threads {}, after applied for {} threads, used {} of the thread {} limit {}.", task->getId().toString(), active_set.size(), isWaiting ? "from the waiting set" : "directly", needed_threads, estimated_thread_usage, min_tso == tso ? "hard" : "soft", min_tso == tso ? thread_hard_limit : thread_soft_limit); return true; } else From eef9e22ecdfd6cb8a34c353eaf5ad1dba8406981 Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Sun, 31 Jul 2022 18:30:04 +0800 Subject: [PATCH 15/17] Add benchmark for collation (#5491) close pingcap/tiflash#5500 --- dbms/src/Functions/tests/bench_collation.cpp | 150 +++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 dbms/src/Functions/tests/bench_collation.cpp diff --git a/dbms/src/Functions/tests/bench_collation.cpp b/dbms/src/Functions/tests/bench_collation.cpp new file mode 100644 index 00000000000..a9054119dd2 --- /dev/null +++ b/dbms/src/Functions/tests/bench_collation.cpp @@ -0,0 +1,150 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +/// this is a hack, include the cpp file so we can test MatchImpl directly +#include // NOLINT + +namespace DB +{ +namespace tests +{ + +class CollationBench : public benchmark::Fixture +{ +public: + using ColStringType = typename TypeTraits::FieldType; + using ColUInt8Type = typename TypeTraits::FieldType; + + ColumnsWithTypeAndName data{toVec("col0", std::vector(1000000, "aaaaaaaaaaaaa")), + toVec("col1", std::vector(1000000, "aaaaaaaaaaaaa")), + toVec("result", std::vector{})}; + + ColumnsWithTypeAndName like_data{toVec("col0", std::vector(1000000, "qwdgefwabchfue")), + createConstColumn(1000000, "%abc%"), + createConstColumn(1000000, static_cast('\\')), + toVec("result", std::vector{})}; +}; + +class CollationLessBench : public CollationBench +{ +public: + void SetUp(const benchmark::State &) override {} +}; + +class CollationEqBench : public CollationBench +{ +public: + void SetUp(const benchmark::State &) override {} +}; + +class CollationLikeBench : public CollationBench +{ +public: + void SetUp(const benchmark::State &) override {} +}; + +#define BENCH_LESS_COLLATOR(collator) \ + BENCHMARK_DEFINE_F(CollationLessBench, collator) \ + (benchmark::State & state) \ + try \ + { \ + FunctionLess fl; \ + TiDB::TiDBCollatorPtr collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::collator); \ + fl.setCollator(collator); \ + Block block(data); \ + ColumnNumbers arguments{0, 1}; \ + for (auto _ : state) \ + { \ + fl.executeImpl(block, arguments, 2); \ + } \ + } \ + CATCH \ + BENCHMARK_REGISTER_F(CollationLessBench, collator)->Iterations(10); + + +#define BENCH_EQ_COLLATOR(collator) \ + BENCHMARK_DEFINE_F(CollationEqBench, collator) \ + (benchmark::State & state) \ + try \ + { \ + FunctionEquals fe; \ + TiDB::TiDBCollatorPtr collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::collator); \ + fe.setCollator(collator); \ + Block block(data); \ + ColumnNumbers arguments{0, 1}; \ + for (auto _ : state) \ + { \ + fe.executeImpl(block, arguments, 2); \ + } \ + } \ + CATCH \ + BENCHMARK_REGISTER_F(CollationEqBench, collator)->Iterations(10); + + +#define BENCH_LIKE_COLLATOR(collator) \ + BENCHMARK_DEFINE_F(CollationLikeBench, collator) \ + (benchmark::State & state) \ + try \ + { \ + FunctionLike3Args fl; \ + TiDB::TiDBCollatorPtr collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::collator); \ + fl.setCollator(collator); \ + Block block(like_data); \ + ColumnNumbers arguments{0, 1, 2}; \ + for (auto _ : state) \ + { \ + fl.executeImpl(block, arguments, 3); \ + } \ + } \ + CATCH \ + BENCHMARK_REGISTER_F(CollationLikeBench, collator)->Iterations(10); + + +BENCH_LESS_COLLATOR(UTF8MB4_BIN); +BENCH_LESS_COLLATOR(UTF8MB4_GENERAL_CI); +BENCH_LESS_COLLATOR(UTF8MB4_UNICODE_CI); +BENCH_LESS_COLLATOR(UTF8_BIN); +BENCH_LESS_COLLATOR(UTF8_GENERAL_CI); +BENCH_LESS_COLLATOR(UTF8_UNICODE_CI); +BENCH_LESS_COLLATOR(ASCII_BIN); +BENCH_LESS_COLLATOR(BINARY); +BENCH_LESS_COLLATOR(LATIN1_BIN); + +BENCH_EQ_COLLATOR(UTF8MB4_BIN); +BENCH_EQ_COLLATOR(UTF8MB4_GENERAL_CI); +BENCH_EQ_COLLATOR(UTF8MB4_UNICODE_CI); +BENCH_EQ_COLLATOR(UTF8_BIN); +BENCH_EQ_COLLATOR(UTF8_GENERAL_CI); +BENCH_EQ_COLLATOR(UTF8_UNICODE_CI); +BENCH_EQ_COLLATOR(ASCII_BIN); +BENCH_EQ_COLLATOR(BINARY); +BENCH_EQ_COLLATOR(LATIN1_BIN); + +BENCH_LIKE_COLLATOR(UTF8MB4_BIN); +BENCH_LIKE_COLLATOR(UTF8MB4_GENERAL_CI); +BENCH_LIKE_COLLATOR(UTF8MB4_UNICODE_CI); +BENCH_LIKE_COLLATOR(UTF8_BIN); +BENCH_LIKE_COLLATOR(UTF8_GENERAL_CI); +BENCH_LIKE_COLLATOR(UTF8_UNICODE_CI); +BENCH_LIKE_COLLATOR(ASCII_BIN); +BENCH_LIKE_COLLATOR(BINARY); +BENCH_LIKE_COLLATOR(LATIN1_BIN); + +} // namespace tests +} // namespace DB From bebd45a85eed13cffc55275ffbd3f98f9a438f8e Mon Sep 17 00:00:00 2001 From: Fu Zhe Date: Tue, 2 Aug 2022 11:26:05 +0800 Subject: [PATCH 16/17] *: Combine LOG_XXX and LOG_FMT_XXX (#5512) ref pingcap/tiflash#5511 --- libs/libcommon/include/common/MacroUtils.h | 45 +++++++ libs/libcommon/include/common/logger_useful.h | 113 ++++++++---------- 2 files changed, 95 insertions(+), 63 deletions(-) create mode 100644 libs/libcommon/include/common/MacroUtils.h diff --git a/libs/libcommon/include/common/MacroUtils.h b/libs/libcommon/include/common/MacroUtils.h new file mode 100644 index 00000000000..e7466f62536 --- /dev/null +++ b/libs/libcommon/include/common/MacroUtils.h @@ -0,0 +1,45 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#define TF_GET_1ST_ARG(a, ...) a +#define TF_GET_2ND_ARG(a1, a2, ...) a2 +#define TF_GET_3RD_ARG(a1, a2, a3, ...) a3 +#define TF_GET_4TH_ARG(a1, a2, a3, a4, ...) a4 +#define TF_GET_5TH_ARG(a1, a2, a3, a4, a5, ...) a5 +#define TF_GET_6TH_ARG(a1, a2, a3, a4, a5, a6, ...) a6 +#define TF_GET_7TH_ARG(a1, a2, a3, a4, a5, a6, a7, ...) a7 +#define TF_GET_8TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, ...) a8 +#define TF_GET_9TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, ...) a9 +#define TF_GET_10TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, ...) a10 +#define TF_GET_11TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, ...) a11 +#define TF_GET_12TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, ...) a12 +#define TF_GET_13TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, ...) a13 +#define TF_GET_14TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, ...) a14 +#define TF_GET_15TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, ...) a15 +#define TF_GET_16TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, ...) a16 +#define TF_GET_17TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, ...) a17 +#define TF_GET_18TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, ...) a18 +#define TF_GET_19TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, ...) a19 +#define TF_GET_20TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, ...) a20 +#define TF_GET_21TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, ...) a21 +#define TF_GET_22TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, ...) a22 +#define TF_GET_23TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, ...) a23 +#define TF_GET_24TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, ...) a24 +#define TF_GET_25TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, ...) a25 +#define TF_GET_26TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, ...) a26 +#define TF_GET_27TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, ...) a27 +#define TF_GET_28TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, ...) a28 +#define TF_GET_29TH_ARG(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, ...) a29 diff --git a/libs/libcommon/include/common/logger_useful.h b/libs/libcommon/include/common/logger_useful.h index e3981baf34c..44e7d45fca1 100644 --- a/libs/libcommon/include/common/logger_useful.h +++ b/libs/libcommon/include/common/logger_useful.h @@ -17,6 +17,7 @@ /// Macros for convenient usage of Poco logger. #include +#include #include #include @@ -26,17 +27,6 @@ namespace LogFmtDetails { -template -inline constexpr size_t numArgs(Ts &&...) -{ - return sizeof...(Ts); -} -template -inline constexpr auto firstArg(T && x, Ts &&...) -{ - return std::forward(x); -} - // https://stackoverflow.com/questions/8487986/file-macro-shows-full-path/54335644#54335644 template inline constexpr size_t getFileNameOffset(const T (&str)[S], size_t i = S - 1) @@ -50,8 +40,8 @@ inline constexpr size_t getFileNameOffset(T (&/*str*/)[1]) return 0; } -template -std::string toCheckedFmtStr(const S & format, const Ignored &, Args &&... args) +template +std::string toCheckedFmtStr(const S & format, Args &&... args) { // The second arg is the same as `format`, just ignore // Apply `make_args_checked` for checks `format` validity at compile time. @@ -60,61 +50,58 @@ std::string toCheckedFmtStr(const S & format, const Ignored &, Args &&... args) } } // namespace LogFmtDetails -/// Logs a message to a specified logger with that level. - -#define LOG_IMPL(logger, PRIORITY, message) \ - do \ - { \ - if ((logger)->is((PRIORITY))) \ - { \ - Poco::Message poco_message( \ - /*source*/ (logger)->name(), \ - /*text*/ message, \ - /*prio*/ (PRIORITY), \ - /*file*/ &__FILE__[LogFmtDetails::getFileNameOffset(__FILE__)], \ - /*line*/ __LINE__); \ - (logger)->log(poco_message); \ - } \ - } while (false) - -#define LOG_TRACE(logger, message) LOG_IMPL(logger, Poco::Message::PRIO_TRACE, message) -#define LOG_DEBUG(logger, message) LOG_IMPL(logger, Poco::Message::PRIO_DEBUG, message) -#define LOG_INFO(logger, message) LOG_IMPL(logger, Poco::Message::PRIO_INFORMATION, message) -#define LOG_WARNING(logger, message) LOG_IMPL(logger, Poco::Message::PRIO_WARNING, message) -#define LOG_ERROR(logger, message) LOG_IMPL(logger, Poco::Message::PRIO_ERROR, message) -#define LOG_FATAL(logger, message) LOG_IMPL(logger, Poco::Message::PRIO_FATAL, message) - - /// Logs a message to a specified logger with that level. /// If more than one argument is provided, /// the first argument is interpreted as template with {}-substitutions /// and the latter arguments treat as values to substitute. /// If only one argument is provided, it is threat as message without substitutions. -#define LOG_GET_FIRST_ARG(arg, ...) arg -#define LOG_FMT_IMPL(logger, PRIORITY, ...) \ - do \ - { \ - if ((logger)->is((PRIORITY))) \ - { \ - std::string formatted_message = LogFmtDetails::numArgs(__VA_ARGS__) > 1 \ - ? LogFmtDetails::toCheckedFmtStr( \ - FMT_STRING(LOG_GET_FIRST_ARG(__VA_ARGS__)), \ - __VA_ARGS__) \ - : LogFmtDetails::firstArg(__VA_ARGS__); \ - Poco::Message poco_message( \ - /*source*/ (logger)->name(), \ - /*text*/ formatted_message, \ - /*prio*/ (PRIORITY), \ - /*file*/ &__FILE__[LogFmtDetails::getFileNameOffset(__FILE__)], \ - /*line*/ __LINE__); \ - (logger)->log(poco_message); \ - } \ +#define LOG_INTERNAL(logger, PRIORITY, message) \ + do \ + { \ + Poco::Message poco_message( \ + /*source*/ (logger)->name(), \ + /*text*/ (message), \ + /*prio*/ (PRIORITY), \ + /*file*/ &__FILE__[LogFmtDetails::getFileNameOffset(__FILE__)], \ + /*line*/ __LINE__); \ + (logger)->log(poco_message); \ } while (false) -#define LOG_FMT_TRACE(logger, ...) LOG_FMT_IMPL(logger, Poco::Message::PRIO_TRACE, __VA_ARGS__) -#define LOG_FMT_DEBUG(logger, ...) LOG_FMT_IMPL(logger, Poco::Message::PRIO_DEBUG, __VA_ARGS__) -#define LOG_FMT_INFO(logger, ...) LOG_FMT_IMPL(logger, Poco::Message::PRIO_INFORMATION, __VA_ARGS__) -#define LOG_FMT_WARNING(logger, ...) LOG_FMT_IMPL(logger, Poco::Message::PRIO_WARNING, __VA_ARGS__) -#define LOG_FMT_ERROR(logger, ...) LOG_FMT_IMPL(logger, Poco::Message::PRIO_ERROR, __VA_ARGS__) -#define LOG_FMT_FATAL(logger, ...) LOG_FMT_IMPL(logger, Poco::Message::PRIO_FATAL, __VA_ARGS__) + +#define LOG_IMPL_0(logger, PRIORITY, message) \ + do \ + { \ + if ((logger)->is((PRIORITY))) \ + LOG_INTERNAL(logger, PRIORITY, message); \ + } while (false) + +#define LOG_IMPL_1(logger, PRIORITY, fmt_str, ...) \ + do \ + { \ + if ((logger)->is((PRIORITY))) \ + { \ + auto _message = LogFmtDetails::toCheckedFmtStr(FMT_STRING(fmt_str), __VA_ARGS__); \ + LOG_INTERNAL(logger, PRIORITY, _message); \ + } \ + } while (false) + +#define LOG_IMPL_CHOSER(...) TF_GET_29TH_ARG(__VA_ARGS__, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_1, LOG_IMPL_0) + +// clang-format off +#define LOG_IMPL(logger, PRIORITY, ...) LOG_IMPL_CHOSER(__VA_ARGS__)(logger, PRIORITY, __VA_ARGS__) +// clang-format on + +#define LOG_TRACE(logger, ...) LOG_IMPL(logger, Poco::Message::PRIO_TRACE, __VA_ARGS__) +#define LOG_DEBUG(logger, ...) LOG_IMPL(logger, Poco::Message::PRIO_DEBUG, __VA_ARGS__) +#define LOG_INFO(logger, ...) LOG_IMPL(logger, Poco::Message::PRIO_INFORMATION, __VA_ARGS__) +#define LOG_WARNING(logger, ...) LOG_IMPL(logger, Poco::Message::PRIO_WARNING, __VA_ARGS__) +#define LOG_ERROR(logger, ...) LOG_IMPL(logger, Poco::Message::PRIO_ERROR, __VA_ARGS__) +#define LOG_FATAL(logger, ...) LOG_IMPL(logger, Poco::Message::PRIO_FATAL, __VA_ARGS__) + +#define LOG_FMT_TRACE(...) LOG_TRACE(__VA_ARGS__) +#define LOG_FMT_DEBUG(...) LOG_DEBUG(__VA_ARGS__) +#define LOG_FMT_INFO(...) LOG_INFO(__VA_ARGS__) +#define LOG_FMT_WARNING(...) LOG_WARNING(__VA_ARGS__) +#define LOG_FMT_ERROR(...) LOG_ERROR(__VA_ARGS__) +#define LOG_FMT_FATAL(...) LOG_FATAL(__VA_ARGS__) From 4972cf3faa4c53cf0341ce6c3ea1f3e8750b9e8b Mon Sep 17 00:00:00 2001 From: yanweiqi <592838129@qq.com> Date: Tue, 2 Aug 2022 12:08:05 +0800 Subject: [PATCH 17/17] *: decouple FlashGrpcServerHolder from Server.cpp (#5516) ref pingcap/tiflash#4609 --- dbms/src/Flash/DiagnosticsService.cpp | 12 +- dbms/src/Flash/DiagnosticsService.h | 8 +- dbms/src/Server/CMakeLists.txt | 1 + dbms/src/Server/FlashGrpcServerHolder.cpp | 198 ++++++++++++++++++++++ dbms/src/Server/FlashGrpcServerHolder.h | 48 ++++++ dbms/src/Server/Server.cpp | 194 +-------------------- dbms/src/Server/Server.h | 2 +- 7 files changed, 260 insertions(+), 203 deletions(-) create mode 100644 dbms/src/Server/FlashGrpcServerHolder.cpp create mode 100644 dbms/src/Server/FlashGrpcServerHolder.h diff --git a/dbms/src/Flash/DiagnosticsService.cpp b/dbms/src/Flash/DiagnosticsService.cpp index 937f2794fa8..11de7687e46 100644 --- a/dbms/src/Flash/DiagnosticsService.cpp +++ b/dbms/src/Flash/DiagnosticsService.cpp @@ -38,7 +38,7 @@ ::grpc::Status DiagnosticsService::server_info( ::diagnosticspb::ServerInfoResponse * response) try { - const TiFlashRaftProxyHelper * helper = server.context().getTMTContext().getKVStore()->getProxyHelper(); + const TiFlashRaftProxyHelper * helper = context.getTMTContext().getKVStore()->getProxyHelper(); if (helper) { std::string req = request->SerializeAsString(); @@ -63,18 +63,18 @@ catch (const std::exception & e) } // get & filter(ts of last record < start-time) all files in same log directory. -std::list getFilesToSearch(IServer & server, Poco::Logger * log, const int64_t start_time) +std::list getFilesToSearch(Poco::Util::LayeredConfiguration & config, Poco::Logger * log, const int64_t start_time) { std::list files_to_search; std::string log_dir; // log directory - auto error_log_file_prefix = server.config().getString("logger.errorlog", "*"); - auto tracing_log_file_prefix = server.config().getString("logger.tracing_log", "*"); + auto error_log_file_prefix = config.getString("logger.errorlog", "*"); + auto tracing_log_file_prefix = config.getString("logger.tracing_log", "*"); // ignore tiflash error log and mpp task tracing log std::vector ignore_log_file_prefixes = {error_log_file_prefix, tracing_log_file_prefix}; { - auto log_file_prefix = server.config().getString("logger.log"); + auto log_file_prefix = config.getString("logger.log"); if (auto it = log_file_prefix.rfind('/'); it != std::string::npos) { log_dir = std::string(log_file_prefix.begin(), log_file_prefix.begin() + it); @@ -163,7 +163,7 @@ ::grpc::Status DiagnosticsService::search_log( LOG_FMT_DEBUG(log, "Handling SearchLog done: {}", request->DebugString()); }); - auto files_to_search = getFilesToSearch(server, log, start_time); + auto files_to_search = getFilesToSearch(config, log, start_time); for (const auto & path : files_to_search) { diff --git a/dbms/src/Flash/DiagnosticsService.h b/dbms/src/Flash/DiagnosticsService.h index a48e1e51a0c..1bba7d63c53 100644 --- a/dbms/src/Flash/DiagnosticsService.h +++ b/dbms/src/Flash/DiagnosticsService.h @@ -32,9 +32,10 @@ class DiagnosticsService final : public ::diagnosticspb::Diagnostics::Service , private boost::noncopyable { public: - explicit DiagnosticsService(IServer & _server) + explicit DiagnosticsService(Context & context_, Poco::Util::LayeredConfiguration & config_) : log(&Poco::Logger::get("DiagnosticsService")) - , server(_server) + , context(context_) + , config(config_) {} ~DiagnosticsService() override = default; @@ -51,8 +52,9 @@ class DiagnosticsService final : public ::diagnosticspb::Diagnostics::Service private: Poco::Logger * log; + Context & context; - IServer & server; + Poco::Util::LayeredConfiguration & config; }; } // namespace DB diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt index 2948bb076db..77ab5e69838 100644 --- a/dbms/src/Server/CMakeLists.txt +++ b/dbms/src/Server/CMakeLists.txt @@ -29,6 +29,7 @@ configure_file (config_tools.h.in ${CMAKE_CURRENT_BINARY_DIR}/config_tools.h) add_library (clickhouse-server-lib HTTPHandler.cpp + FlashGrpcServerHolder.cpp MetricsTransmitter.cpp MetricsPrometheus.cpp NotFoundHandler.cpp diff --git a/dbms/src/Server/FlashGrpcServerHolder.cpp b/dbms/src/Server/FlashGrpcServerHolder.cpp new file mode 100644 index 00000000000..c82f79976e8 --- /dev/null +++ b/dbms/src/Server/FlashGrpcServerHolder.cpp @@ -0,0 +1,198 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +namespace DB +{ +namespace ErrorCodes +{ +extern const int IP_ADDRESS_NOT_ALLOWED; +} // namespace ErrorCodes +namespace +{ +void handleRpcs(grpc::ServerCompletionQueue * curcq, const LoggerPtr & log) +{ + GET_METRIC(tiflash_thread_count, type_total_rpc_async_worker).Increment(); + SCOPE_EXIT({ + GET_METRIC(tiflash_thread_count, type_total_rpc_async_worker).Decrement(); + }); + void * tag = nullptr; // uniquely identifies a request. + bool ok = false; + while (true) + { + String err_msg; + try + { + // Block waiting to read the next event from the completion queue. The + // event is uniquely identified by its tag, which in this case is the + // memory address of a EstablishCallData instance. + // The return value of Next should always be checked. This return value + // tells us whether there is any kind of event or cq is shutting down. + if (!curcq->Next(&tag, &ok)) + { + LOG_FMT_INFO(log, "CQ is fully drained and shut down"); + break; + } + GET_METRIC(tiflash_thread_count, type_active_rpc_async_worker).Increment(); + SCOPE_EXIT({ + GET_METRIC(tiflash_thread_count, type_active_rpc_async_worker).Decrement(); + }); + // If ok is false, it means server is shutdown. + // We need not log all not ok events, since the volumn is large which will pollute the content of log. + if (ok) + static_cast(tag)->proceed(); + else + static_cast(tag)->cancel(); + } + catch (Exception & e) + { + err_msg = e.displayText(); + LOG_FMT_ERROR(log, "handleRpcs meets error: {} Stack Trace : {}", err_msg, e.getStackTrace().toString()); + } + catch (pingcap::Exception & e) + { + err_msg = e.message(); + LOG_FMT_ERROR(log, "handleRpcs meets error: {}", err_msg); + } + catch (std::exception & e) + { + err_msg = e.what(); + LOG_FMT_ERROR(log, "handleRpcs meets error: {}", err_msg); + } + catch (...) + { + err_msg = "unrecovered error"; + LOG_FMT_ERROR(log, "handleRpcs meets error: {}", err_msg); + throw; + } + } +} +} // namespace + +FlashGrpcServerHolder::FlashGrpcServerHolder(Context & context, Poco::Util::LayeredConfiguration & config_, TiFlashSecurityConfig & security_config, const TiFlashRaftConfig & raft_config, const LoggerPtr & log_) + : log(log_) + , is_shutdown(std::make_shared>(false)) +{ + grpc::ServerBuilder builder; + if (security_config.has_tls_config) + { + grpc::SslServerCredentialsOptions server_cred(GRPC_SSL_REQUEST_AND_REQUIRE_CLIENT_CERTIFICATE_AND_VERIFY); + auto options = security_config.readAndCacheSecurityInfo(); + server_cred.pem_root_certs = options.pem_root_certs; + server_cred.pem_key_cert_pairs.push_back( + grpc::SslServerCredentialsOptions::PemKeyCertPair{options.pem_private_key, options.pem_cert_chain}); + builder.AddListeningPort(raft_config.flash_server_addr, grpc::SslServerCredentials(server_cred)); + } + else + { + builder.AddListeningPort(raft_config.flash_server_addr, grpc::InsecureServerCredentials()); + } + + /// Init and register flash service. + bool enable_async_server = context.getSettingsRef().enable_async_server; + if (enable_async_server) + flash_service = std::make_unique(security_config, context); + else + flash_service = std::make_unique(security_config, context); + diagnostics_service = std::make_unique(context, config_); + builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS, 5 * 1000)); + builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_HTTP2_MIN_SENT_PING_INTERVAL_WITHOUT_DATA_MS, 10 * 1000)); + builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1)); + // number of grpc thread pool's non-temporary threads, better tune it up to avoid frequent creation/destruction of threads + auto max_grpc_pollers = context.getSettingsRef().max_grpc_pollers; + if (max_grpc_pollers > 0 && max_grpc_pollers <= std::numeric_limits::max()) + builder.SetSyncServerOption(grpc::ServerBuilder::SyncServerOption::MAX_POLLERS, max_grpc_pollers); + builder.RegisterService(flash_service.get()); + LOG_FMT_INFO(log, "Flash service registered"); + builder.RegisterService(diagnostics_service.get()); + LOG_FMT_INFO(log, "Diagnostics service registered"); + + /// Kick off grpc server. + // Prevent TiKV from throwing "Received message larger than max (4404462 vs. 4194304)" error. + builder.SetMaxReceiveMessageSize(-1); + builder.SetMaxSendMessageSize(-1); + thread_manager = DB::newThreadManager(); + int async_cq_num = context.getSettingsRef().async_cqs; + if (enable_async_server) + { + for (int i = 0; i < async_cq_num; ++i) + { + cqs.emplace_back(builder.AddCompletionQueue()); + notify_cqs.emplace_back(builder.AddCompletionQueue()); + } + } + flash_grpc_server = builder.BuildAndStart(); + if (!flash_grpc_server) + { + throw Exception("Exception happens when start grpc server, the flash.service_addr may be invalid, flash.service_addr is " + raft_config.flash_server_addr, ErrorCodes::IP_ADDRESS_NOT_ALLOWED); + } + LOG_FMT_INFO(log, "Flash grpc server listening on [{}]", raft_config.flash_server_addr); + Debug::setServiceAddr(raft_config.flash_server_addr); + if (enable_async_server) + { + int preallocated_request_count_per_poller = context.getSettingsRef().preallocated_request_count_per_poller; + int pollers_per_cq = context.getSettingsRef().async_pollers_per_cq; + for (int i = 0; i < async_cq_num * pollers_per_cq; ++i) + { + auto * cq = cqs[i / pollers_per_cq].get(); + auto * notify_cq = notify_cqs[i / pollers_per_cq].get(); + for (int j = 0; j < preallocated_request_count_per_poller; ++j) + { + // EstablishCallData will handle its lifecycle by itself. + EstablishCallData::spawn(assert_cast(flash_service.get()), cq, notify_cq, is_shutdown); + } + thread_manager->schedule(false, "async_poller", [cq, this] { handleRpcs(cq, log); }); + thread_manager->schedule(false, "async_poller", [notify_cq, this] { handleRpcs(notify_cq, log); }); + } + } +} + +FlashGrpcServerHolder::~FlashGrpcServerHolder() +{ + try + { + /// Shut down grpc server. + LOG_FMT_INFO(log, "Begin to shut down flash grpc server"); + flash_grpc_server->Shutdown(); + *is_shutdown = true; + // Wait all existed MPPTunnels done to prevent crash. + // If all existed MPPTunnels are done, almost in all cases it means all existed MPPTasks and ExchangeReceivers are also done. + const int max_wait_cnt = 300; + int wait_cnt = 0; + while (GET_METRIC(tiflash_object_count, type_count_of_mpptunnel).Value() >= 1 && (wait_cnt++ < max_wait_cnt)) + std::this_thread::sleep_for(std::chrono::seconds(1)); + + for (auto & cq : cqs) + cq->Shutdown(); + for (auto & cq : notify_cqs) + cq->Shutdown(); + thread_manager->wait(); + flash_grpc_server->Wait(); + flash_grpc_server.reset(); + if (GRPCCompletionQueuePool::global_instance) + GRPCCompletionQueuePool::global_instance->markShutdown(); + LOG_FMT_INFO(log, "Shut down flash grpc server"); + + /// Close flash service. + LOG_FMT_INFO(log, "Begin to shut down flash service"); + flash_service.reset(); + LOG_FMT_INFO(log, "Shut down flash service"); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_FMT_FATAL(log, "Exception happens in destructor of FlashGrpcServerHolder with message: {}", message); + std::terminate(); + } +} +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Server/FlashGrpcServerHolder.h b/dbms/src/Server/FlashGrpcServerHolder.h new file mode 100644 index 00000000000..81c50dc609b --- /dev/null +++ b/dbms/src/Server/FlashGrpcServerHolder.h @@ -0,0 +1,48 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ +class FlashGrpcServerHolder +{ +public: + FlashGrpcServerHolder( + Context & context, + Poco::Util::LayeredConfiguration & config_, + TiFlashSecurityConfig & security_config, + const TiFlashRaftConfig & raft_config, + const LoggerPtr & log_); + ~FlashGrpcServerHolder(); + +private: + const LoggerPtr & log; + std::shared_ptr> is_shutdown; + std::unique_ptr flash_service = nullptr; + std::unique_ptr diagnostics_service = nullptr; + std::unique_ptr flash_grpc_server = nullptr; + // cqs and notify_cqs are used for processing async grpc events (currently only EstablishMPPConnection). + std::vector> cqs; + std::vector> notify_cqs; + std::shared_ptr thread_manager; +}; + +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 24b0dfd2a69..607b7e3e6c8 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -194,7 +194,6 @@ extern const int NO_ELEMENTS_IN_CONFIG; extern const int SUPPORT_IS_DISABLED; extern const int ARGUMENT_OUT_OF_BOUND; extern const int INVALID_CONFIG_PARAMETER; -extern const int IP_ADDRESS_NOT_ALLOWED; } // namespace ErrorCodes namespace Debug @@ -518,196 +517,6 @@ void initStores(Context & global_context, const LoggerPtr & log, bool lazily_ini } } -void handleRpcs(grpc::ServerCompletionQueue * curcq, const LoggerPtr & log) -{ - GET_METRIC(tiflash_thread_count, type_total_rpc_async_worker).Increment(); - SCOPE_EXIT({ - GET_METRIC(tiflash_thread_count, type_total_rpc_async_worker).Decrement(); - }); - void * tag = nullptr; // uniquely identifies a request. - bool ok = false; - while (true) - { - String err_msg; - try - { - // Block waiting to read the next event from the completion queue. The - // event is uniquely identified by its tag, which in this case is the - // memory address of a EstablishCallData instance. - // The return value of Next should always be checked. This return value - // tells us whether there is any kind of event or cq is shutting down. - if (!curcq->Next(&tag, &ok)) - { - LOG_FMT_INFO(grpc_log, "CQ is fully drained and shut down"); - break; - } - GET_METRIC(tiflash_thread_count, type_active_rpc_async_worker).Increment(); - SCOPE_EXIT({ - GET_METRIC(tiflash_thread_count, type_active_rpc_async_worker).Decrement(); - }); - // If ok is false, it means server is shutdown. - // We need not log all not ok events, since the volumn is large which will pollute the content of log. - if (ok) - static_cast(tag)->proceed(); - else - static_cast(tag)->cancel(); - } - catch (Exception & e) - { - err_msg = e.displayText(); - LOG_FMT_ERROR(log, "handleRpcs meets error: {} Stack Trace : {}", err_msg, e.getStackTrace().toString()); - } - catch (pingcap::Exception & e) - { - err_msg = e.message(); - LOG_FMT_ERROR(log, "handleRpcs meets error: {}", err_msg); - } - catch (std::exception & e) - { - err_msg = e.what(); - LOG_FMT_ERROR(log, "handleRpcs meets error: {}", err_msg); - } - catch (...) - { - err_msg = "unrecovered error"; - LOG_FMT_ERROR(log, "handleRpcs meets error: {}", err_msg); - throw; - } - } -} - -class Server::FlashGrpcServerHolder -{ -public: - FlashGrpcServerHolder(Server & server, const TiFlashRaftConfig & raft_config, const LoggerPtr & log_) - : log(log_) - , is_shutdown(std::make_shared>(false)) - { - grpc::ServerBuilder builder; - if (server.security_config.has_tls_config) - { - grpc::SslServerCredentialsOptions server_cred(GRPC_SSL_REQUEST_AND_REQUIRE_CLIENT_CERTIFICATE_AND_VERIFY); - auto options = server.security_config.readAndCacheSecurityInfo(); - server_cred.pem_root_certs = options.pem_root_certs; - server_cred.pem_key_cert_pairs.push_back( - grpc::SslServerCredentialsOptions::PemKeyCertPair{options.pem_private_key, options.pem_cert_chain}); - builder.AddListeningPort(raft_config.flash_server_addr, grpc::SslServerCredentials(server_cred)); - } - else - { - builder.AddListeningPort(raft_config.flash_server_addr, grpc::InsecureServerCredentials()); - } - - /// Init and register flash service. - bool enable_async_server = server.context().getSettingsRef().enable_async_server; - if (enable_async_server) - flash_service = std::make_unique(server.securityConfig(), server.context()); - else - flash_service = std::make_unique(server.securityConfig(), server.context()); - diagnostics_service = std::make_unique(server); - builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS, 5 * 1000)); - builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_HTTP2_MIN_SENT_PING_INTERVAL_WITHOUT_DATA_MS, 10 * 1000)); - builder.SetOption(grpc::MakeChannelArgumentOption(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1)); - // number of grpc thread pool's non-temporary threads, better tune it up to avoid frequent creation/destruction of threads - auto max_grpc_pollers = server.context().getSettingsRef().max_grpc_pollers; - if (max_grpc_pollers > 0 && max_grpc_pollers <= std::numeric_limits::max()) - builder.SetSyncServerOption(grpc::ServerBuilder::SyncServerOption::MAX_POLLERS, max_grpc_pollers); - builder.RegisterService(flash_service.get()); - LOG_FMT_INFO(log, "Flash service registered"); - builder.RegisterService(diagnostics_service.get()); - LOG_FMT_INFO(log, "Diagnostics service registered"); - - /// Kick off grpc server. - // Prevent TiKV from throwing "Received message larger than max (4404462 vs. 4194304)" error. - builder.SetMaxReceiveMessageSize(-1); - builder.SetMaxSendMessageSize(-1); - thread_manager = DB::newThreadManager(); - int async_cq_num = server.context().getSettingsRef().async_cqs; - if (enable_async_server) - { - for (int i = 0; i < async_cq_num; ++i) - { - cqs.emplace_back(builder.AddCompletionQueue()); - notify_cqs.emplace_back(builder.AddCompletionQueue()); - } - } - flash_grpc_server = builder.BuildAndStart(); - if (!flash_grpc_server) - { - throw Exception("Exception happens when start grpc server, the flash.service_addr may be invalid, flash.service_addr is " + raft_config.flash_server_addr, ErrorCodes::IP_ADDRESS_NOT_ALLOWED); - } - LOG_FMT_INFO(log, "Flash grpc server listening on [{}]", raft_config.flash_server_addr); - Debug::setServiceAddr(raft_config.flash_server_addr); - if (enable_async_server) - { - int preallocated_request_count_per_poller = server.context().getSettingsRef().preallocated_request_count_per_poller; - int pollers_per_cq = server.context().getSettingsRef().async_pollers_per_cq; - for (int i = 0; i < async_cq_num * pollers_per_cq; ++i) - { - auto * cq = cqs[i / pollers_per_cq].get(); - auto * notify_cq = notify_cqs[i / pollers_per_cq].get(); - for (int j = 0; j < preallocated_request_count_per_poller; ++j) - { - // EstablishCallData will handle its lifecycle by itself. - EstablishCallData::spawn(assert_cast(flash_service.get()), cq, notify_cq, is_shutdown); - } - thread_manager->schedule(false, "async_poller", [cq, this] { handleRpcs(cq, log); }); - thread_manager->schedule(false, "async_poller", [notify_cq, this] { handleRpcs(notify_cq, log); }); - } - } - } - - ~FlashGrpcServerHolder() - { - try - { - /// Shut down grpc server. - LOG_FMT_INFO(log, "Begin to shut down flash grpc server"); - flash_grpc_server->Shutdown(); - *is_shutdown = true; - // Wait all existed MPPTunnels done to prevent crash. - // If all existed MPPTunnels are done, almost in all cases it means all existed MPPTasks and ExchangeReceivers are also done. - const int max_wait_cnt = 300; - int wait_cnt = 0; - while (GET_METRIC(tiflash_object_count, type_count_of_mpptunnel).Value() >= 1 && (wait_cnt++ < max_wait_cnt)) - std::this_thread::sleep_for(std::chrono::seconds(1)); - - for (auto & cq : cqs) - cq->Shutdown(); - for (auto & cq : notify_cqs) - cq->Shutdown(); - thread_manager->wait(); - flash_grpc_server->Wait(); - flash_grpc_server.reset(); - if (GRPCCompletionQueuePool::global_instance) - GRPCCompletionQueuePool::global_instance->markShutdown(); - LOG_FMT_INFO(log, "Shut down flash grpc server"); - - /// Close flash service. - LOG_FMT_INFO(log, "Begin to shut down flash service"); - flash_service.reset(); - LOG_FMT_INFO(log, "Shut down flash service"); - } - catch (...) - { - auto message = getCurrentExceptionMessage(false); - LOG_FMT_FATAL(log, "Exception happens in destructor of FlashGrpcServerHolder with message: {}", message); - std::terminate(); - } - } - -private: - const LoggerPtr & log; - std::shared_ptr> is_shutdown; - std::unique_ptr flash_service = nullptr; - std::unique_ptr diagnostics_service = nullptr; - std::unique_ptr flash_grpc_server = nullptr; - // cqs and notify_cqs are used for processing async grpc events (currently only EstablishMPPConnection). - std::vector> cqs; - std::vector> notify_cqs; - std::shared_ptr thread_manager; -}; - class Server::TcpHttpServersHolder { public: @@ -1080,7 +889,6 @@ int Server::main(const std::vector & /*args*/) LOG_FMT_INFO(log, "TiFlashRaftProxyHelper is null, failed to get server info"); } - // print necessary grpc log. grpc_log = Logger::get("grpc"); gpr_set_log_verbosity(GPR_LOG_SEVERITY_DEBUG); gpr_set_log_function(&printGRPCLog); @@ -1432,7 +1240,7 @@ int Server::main(const std::vector & /*args*/) } /// Then, startup grpc server to serve raft and/or flash services. - FlashGrpcServerHolder flash_grpc_server_holder(*this, raft_config, log); + FlashGrpcServerHolder flash_grpc_server_holder(this->context(), this->config(), this->security_config, raft_config, log); { TcpHttpServersHolder tcpHttpServersHolder(*this, settings, log); diff --git a/dbms/src/Server/Server.h b/dbms/src/Server/Server.h index 07c5b955a92..9f083d298cf 100644 --- a/dbms/src/Server/Server.h +++ b/dbms/src/Server/Server.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -72,7 +73,6 @@ class Server : public BaseDaemon ServerInfo server_info; - class FlashGrpcServerHolder; class TcpHttpServersHolder; };