From 0dd279141c1579cf8166ebc07d33f78b8b95bdff Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Thu, 11 Aug 2022 16:38:48 +0800 Subject: [PATCH] fix: error data input for date(CAST(value AS DATETIME)) causing high TiFlash sys CPU (#5477) close pingcap/tiflash#5097 --- dbms/src/Common/MyTime.cpp | 62 ++++--- dbms/src/Common/MyTime.h | 11 +- dbms/src/Common/tests/gtest_mytime.cpp | 2 +- dbms/src/Functions/FunctionsTiDBConversion.h | 152 +++++++++--------- .../Functions/tests/gtest_tidb_conversion.cpp | 67 +++++--- tests/fullstack-test/expr/return_warning.test | 1 + 6 files changed, 159 insertions(+), 136 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index d0425364d0d..52da860bd7a 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -56,14 +56,23 @@ int32_t adjustYear(int32_t year) return year; } -void scanTimeArgs(const std::vector & seps, std::initializer_list && list) +bool scanTimeArgs(const std::vector & seps, std::initializer_list && list) { int i = 0; - for (auto * ptr : list) + try { - *ptr = std::stoi(seps[i]); - i++; + for (auto * ptr : list) + { + *ptr = std::stoi(seps[i]); + i++; + } + } + catch (std::exception & e) + { + return false; } + + return true; } // find index of fractional point. @@ -669,7 +678,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t } default: { - throw TiFlashException("Wrong datetime format: " + str, Errors::Types::WrongValue); + return {Field(), is_date}; } } if (l == 5 || l == 6 || l == 8) @@ -719,40 +728,44 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t } if (truncated_or_incorrect) { - throw TiFlashException("Datetime truncated: " + str, Errors::Types::Truncated); + return {Field(), is_date}; } break; } case 3: { // YYYY-MM-DD - scanTimeArgs(seps, {&year, &month, &day}); + if (!scanTimeArgs(seps, {&year, &month, &day})) + return {Field(), is_date}; is_date = true; break; } case 4: { // YYYY-MM-DD HH - scanTimeArgs(seps, {&year, &month, &day, &hour}); + if (!scanTimeArgs(seps, {&year, &month, &day, &hour})) + return {Field(), is_date}; break; } case 5: { // YYYY-MM-DD HH-MM - scanTimeArgs(seps, {&year, &month, &day, &hour, &minute}); + if (!scanTimeArgs(seps, {&year, &month, &day, &hour, &minute})) + return {Field(), is_date}; break; } case 6: { // We don't have fractional seconds part. // YYYY-MM-DD HH-MM-SS - scanTimeArgs(seps, {&year, &month, &day, &hour, &minute, &second}); + if (!scanTimeArgs(seps, {&year, &month, &day, &hour, &minute, &second})) + return {Field(), is_date}; hhmmss = true; break; } default: { - throw Exception("Wrong datetime format"); + return {Field(), is_date}; } } @@ -809,7 +822,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t if (needCheckTimeValid && !checkTimeValid(year, month, day, hour, minute, second)) { - throw Exception("Wrong datetime format"); + return {Field(), is_date}; } MyDateTime result(year, month, day, hour, minute, second, micro_second); @@ -818,7 +831,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t { if (!hhmmss) { - throw TiFlashException("Invalid datetime value: " + str, Errors::Types::WrongValue); + return {Field(), is_date}; } if (!tz_hour.empty()) { @@ -832,7 +845,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t if (delta_hour > 14 || delta_minute > 59 || (delta_hour == 14 && delta_minute != 0) || (tz_sign == "-" && delta_hour == 0 && delta_minute == 0)) { - throw TiFlashException("Invalid datetime value: " + str, Errors::Types::WrongValue); + return {Field(), is_date}; } // by default, if the temporal string literal does not contain timezone information, it will be in the timezone // specified by the time_zone system variable. However, if the timezone is specified in the string literal, we @@ -1074,21 +1087,19 @@ size_t maxFormattedDateTimeStringLength(const String & format) return std::max(result, 1); } -void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const +bool MyTimeBase::isValid(bool allow_zero_in_date, bool allow_invalid_date) const { if (!(year == 0 && month == 0 && day == 0)) { if (!allow_zero_in_date && (month == 0 || day == 0)) { - throw TiFlashException( - fmt::format("Incorrect datetime value: {0:04d}-{1:02d}-{2:02d}", year, month, day), - Errors::Types::WrongValue); + return false; } } if (year >= 9999 || month > 12) { - throw TiFlashException("Incorrect time value", Errors::Types::WrongValue); + return false; } UInt8 max_day = 31; @@ -1096,7 +1107,7 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const { if (month < 1) { - throw TiFlashException(fmt::format("Incorrect time value: month {}", month), Errors::Types::WrongValue); + return false; } constexpr static UInt8 max_days_in_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; static auto is_leap_year = [](UInt16 _year) { @@ -1110,23 +1121,22 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const } if (day > max_day) { - throw TiFlashException( - fmt::format("Incorrect datetime value: {0:04d}-{1:02d}-{2:02d}", year, month, day), - Errors::Types::WrongValue); + return false; } if (hour < 0 || hour >= 24) { - throw TiFlashException("Incorrect datetime value", Errors::Types::WrongValue); + return false; } if (minute >= 60) { - throw TiFlashException("Incorrect datetime value", Errors::Types::WrongValue); + return false; } if (second >= 60) { - throw TiFlashException("Incorrect datetime value", Errors::Types::WrongValue); + return false; } + return true; } bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 & day, const UInt64 & hour, const UInt64 & minute, const UInt64 & second, const UInt64 & microsecond, MyDateTime & result) diff --git a/dbms/src/Common/MyTime.h b/dbms/src/Common/MyTime.h index 8f8ffb85df3..60dafba4821 100644 --- a/dbms/src/Common/MyTime.h +++ b/dbms/src/Common/MyTime.h @@ -113,8 +113,8 @@ struct MyTimeBase std::tuple calcWeek(UInt32 mode) const; // Check validity of time under specified SQL_MODE. - // May throw exception. - void check(bool allow_zero_in_date, bool allow_invalid_date) const; + // return false if time is invalid + bool isValid(bool allow_zero_in_date, bool allow_invalid_date) const; }; struct MyDateTime : public MyTimeBase @@ -181,8 +181,11 @@ struct MyDateTimeParser std::vector parsers; }; -Field parseMyDateTime(const String & str, int8_t fsp = 6, bool needCheckTimeValid = false); -std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t fsp = 6, bool needCheckTimeValid = false); +static int8_t default_fsp = 6; +static bool default_needCheckTimeValid = false; + +Field parseMyDateTime(const String & str, int8_t fsp = default_fsp, bool needCheckTimeValid = default_needCheckTimeValid); +std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t fsp = default_fsp, bool needCheckTimeValid = default_needCheckTimeValid); void convertTimeZone(UInt64 from_time, UInt64 & to_time, const DateLUTImpl & time_zone_from, const DateLUTImpl & time_zone_to, bool throw_exception = false); diff --git a/dbms/src/Common/tests/gtest_mytime.cpp b/dbms/src/Common/tests/gtest_mytime.cpp index f84454248ff..6bb574ea817 100644 --- a/dbms/src/Common/tests/gtest_mytime.cpp +++ b/dbms/src/Common/tests/gtest_mytime.cpp @@ -76,7 +76,7 @@ class TestMyTime : public testing::Test if (expect_error) { MyDateTime datetime(0, 0, 0, 0, 0, 0, 0); - EXPECT_THROW({ numberToDateTime(input, datetime, ctx); }, TiFlashException) << "Original time number: " << input; + EXPECT_TRUE(numberToDateTime(input, datetime, ctx)); return; } diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h index bcd7856ee71..036b04fa8ca 100644 --- a/dbms/src/Functions/FunctionsTiDBConversion.h +++ b/dbms/src/Functions/FunctionsTiDBConversion.h @@ -1345,28 +1345,30 @@ struct TiDBConvertToTime size_t string_size = next_offset - current_offset - 1; StringRef string_ref(&(*chars)[current_offset], string_size); String string_value = string_ref.toString(); - try - { - Field packed_uint_value = parseMyDateTime(string_value, to_fsp); - UInt64 packed_uint = packed_uint_value.template safeGet(); - MyDateTime datetime(packed_uint); - if constexpr (std::is_same_v) - { - MyDate date(datetime.year, datetime.month, datetime.day); - vec_to[i] = date.toPackedUInt(); - } - else - { - vec_to[i] = packed_uint; - } - } - catch (const Exception &) + + Field packed_uint_value = parseMyDateTime(string_value, to_fsp); + + if (packed_uint_value.isNull()) { // Fill NULL if cannot parse (*vec_null_map_to)[i] = 1; vec_to[i] = 0; - handleInvalidTime(context, string_value); + current_offset = next_offset; + continue; + } + + UInt64 packed_uint = packed_uint_value.template safeGet(); + MyDateTime datetime(packed_uint); + if constexpr (std::is_same_v) + { + MyDate date(datetime.year, datetime.month, datetime.day); + vec_to[i] = date.toPackedUInt(); + } + else + { + vec_to[i] = packed_uint; } + current_offset = next_offset; } } @@ -1427,28 +1429,26 @@ struct TiDBConvertToTime for (size_t i = 0; i < size; ++i) { - try - { - MyDateTime datetime(0, 0, 0, 0, 0, 0, 0); - bool is_null = numberToDateTime(vec_from[i], datetime, context.getDAGContext()); - if constexpr (std::is_same_v) - { - MyDate date(datetime.year, datetime.month, datetime.day); - vec_to[i] = date.toPackedUInt(); - } - else - { - vec_to[i] = datetime.toPackedUInt(); - } - (*vec_null_map_to)[i] = is_null; - } - catch (const TiFlashException & e) + MyDateTime datetime(0, 0, 0, 0, 0, 0, 0); + bool is_null = numberToDateTime(vec_from[i], datetime, context.getDAGContext()); + + if (is_null) { - // Cannot cast, fill with NULL (*vec_null_map_to)[i] = 1; vec_to[i] = 0; - handleInvalidTime(context, vec_from[i]); + continue; + } + + if constexpr (std::is_same_v) + { + MyDate date(datetime.year, datetime.month, datetime.day); + vec_to[i] = date.toPackedUInt(); + } + else + { + vec_to[i] = datetime.toPackedUInt(); } + (*vec_null_map_to)[i] = is_null; } } else if constexpr (std::is_floating_point_v) @@ -1474,34 +1474,26 @@ struct TiDBConvertToTime } else { - try - { - Field packed_uint_value = parseMyDateTime(value_str, to_fsp); - UInt64 packed_uint = packed_uint_value.template safeGet(); - MyDateTime datetime(packed_uint); - if constexpr (std::is_same_v) - { - MyDate date(datetime.year, datetime.month, datetime.day); - vec_to[i] = date.toPackedUInt(); - } - else - { - vec_to[i] = packed_uint; - } - } - catch (const Exception &) + Field packed_uint_value = parseMyDateTime(value_str, to_fsp); + + if (packed_uint_value.isNull()) { // Fill NULL if cannot parse (*vec_null_map_to)[i] = 1; vec_to[i] = 0; - handleInvalidTime(context, value_str); + continue; } - catch (const std::exception &) + + UInt64 packed_uint = packed_uint_value.template safeGet(); + MyDateTime datetime(packed_uint); + if constexpr (std::is_same_v) { - // Fill NULL if cannot parse - (*vec_null_map_to)[i] = 1; - vec_to[i] = 0; - handleInvalidTime(context, value_str); + MyDate date(datetime.year, datetime.month, datetime.day); + vec_to[i] = date.toPackedUInt(); + } + else + { + vec_to[i] = packed_uint; } } } @@ -1515,25 +1507,24 @@ struct TiDBConvertToTime for (size_t i = 0; i < size; i++) { String value_str = vec_from[i].toString(type.getScale()); - try - { - Field value = parseMyDateTime(value_str, to_fsp); - MyDateTime datetime(value.template safeGet()); - if constexpr (std::is_same_v) - { - MyDate date(datetime.year, datetime.month, datetime.day); - vec_to[i] = date.toPackedUInt(); - } - else - { - vec_to[i] = datetime.toPackedUInt(); - } - } - catch (const Exception &) + Field value = parseMyDateTime(value_str, to_fsp); + + if (value.getType() == Field::Types::Null) { (*vec_null_map_to)[i] = 1; vec_to[i] = 0; - handleInvalidTime(context, value_str); + continue; + } + + MyDateTime datetime(value.template safeGet()); + if constexpr (std::is_same_v) + { + MyDate date(datetime.year, datetime.month, datetime.day); + vec_to[i] = date.toPackedUInt(); + } + else + { + vec_to[i] = datetime.toPackedUInt(); } } } @@ -1634,6 +1625,7 @@ struct TiDBConvertToDuration } }; +// Return true if the time is invalid. inline bool getDatetime(const Int64 & num, MyDateTime & result, DAGContext * ctx) { UInt64 ymd = num / 1000000; @@ -1651,15 +1643,15 @@ inline bool getDatetime(const Int64 & num, MyDateTime & result, DAGContext * ctx if (toCoreTimeChecked(year, month, day, hour, minute, second, 0, result)) { - throw TiFlashException("Incorrect time value", Errors::Types::WrongValue); + return true; } if (ctx) { - result.check(ctx->allowZeroInDate(), ctx->allowInvalidDate()); + return !result.isValid(ctx->allowZeroInDate(), ctx->allowInvalidDate()); } else { - result.check(false, false); + return !result.isValid(false, false); } return false; } @@ -1685,7 +1677,7 @@ inline bool numberToDateTime(Int64 number, MyDateTime & result, DAGContext * ctx // check MMDD if (number < 101) { - throw TiFlashException("Incorrect time value", Errors::Types::WrongValue); + return true; } // check YYMMDD: 2000-2069 @@ -1697,7 +1689,7 @@ inline bool numberToDateTime(Int64 number, MyDateTime & result, DAGContext * ctx if (number < 70 * 10000 + 101) { - throw TiFlashException("Incorrect time value", Errors::Types::WrongValue); + return true; } // check YYMMDD @@ -1717,7 +1709,7 @@ inline bool numberToDateTime(Int64 number, MyDateTime & result, DAGContext * ctx // check MMDDHHMMSS if (number < 101000000) { - throw TiFlashException("Incorrect time value", Errors::Types::WrongValue); + return true; } // check YYMMDDhhmmss: 2000-2069 @@ -1730,7 +1722,7 @@ inline bool numberToDateTime(Int64 number, MyDateTime & result, DAGContext * ctx // check YYYYMMDDhhmmss if (number < 70 * 10000000000 + 101000000) { - throw TiFlashException("Incorrect time value", Errors::Types::WrongValue); + return true; } // check YYMMDDHHMMSS diff --git a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp index 5f885c2716f..387d479c680 100644 --- a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp +++ b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -21,11 +22,11 @@ #include #include #include +#include #include #include #include - namespace DB::tests { namespace @@ -162,6 +163,21 @@ class TestTidbConversion : public DB::tests::FunctionTest inner_test(false); } + template + typename std::enable_if, void>::type testReturnNull(const Input & input, int fraction) + { + auto inner_test = [&](bool is_const) { + ASSERT_COLUMN_EQ( + is_const ? createDateTimeColumnConst(1, {}, fraction) : createDateTimeColumn({{}}, fraction), + executeFunction( + func_name, + {is_const ? createConstColumn>(1, input) : createColumn>({input}), + createCastTypeConstColumn(fmt::format("Nullable(MyDateTime({}))", fraction))})); + }; + inner_test(true); + inner_test(false); + } + template void testOnlyNull() { @@ -923,31 +939,32 @@ try executeFunction(func_name, {createColumn>({{}, 20211026160859}), createCastTypeConstColumn("Nullable(MyDateTime(6))")})); - ASSERT_THROW( + + ASSERT_COLUMN_EQ( + createDateTimeColumn({{}}, 6), executeFunction(func_name, {createColumn>({MAX_UINT8}), - createCastTypeConstColumn("Nullable(MyDateTime(6))")}), - TiFlashException); - ASSERT_THROW( + createCastTypeConstColumn("Nullable(MyDateTime(6))")})); + ASSERT_COLUMN_EQ( + createDateTimeColumn({{}}, 6), executeFunction(func_name, {createColumn>({MAX_UINT16}), - createCastTypeConstColumn("Nullable(MyDateTime(6))")}), - TiFlashException); - ASSERT_THROW( + createCastTypeConstColumn("Nullable(MyDateTime(6))")})); + ASSERT_COLUMN_EQ( + createDateTimeColumn({{}}, 6), executeFunction(func_name, {createColumn>({MAX_UINT32}), - createCastTypeConstColumn("Nullable(MyDateTime(6))")}), - TiFlashException); + createCastTypeConstColumn("Nullable(MyDateTime(6))")})); ASSERT_COLUMN_EQ( createDateTimeColumn({{}}, 6), executeFunction(func_name, {createColumn>({0}), createCastTypeConstColumn("Nullable(MyDateTime(6))")})); - ASSERT_THROW( + ASSERT_COLUMN_EQ( + createDateTimeColumn({{}, {}}, 6), executeFunction(func_name, {createColumn>({{}, -20211026160859}), - createCastTypeConstColumn("Nullable(MyDateTime(6))")}), - TiFlashException); + createCastTypeConstColumn("Nullable(MyDateTime(6))")})); } CATCH @@ -1195,15 +1212,15 @@ try // mysql: null, warning. // tiflash: null, no warning. // tidb: 0000-00-00 00:00:00 - // testThrowException(0, 6); - testThrowException(12.213, 6); - testThrowException(-12.213, 6); - testThrowException(MAX_FLOAT32, 6); - testThrowException(MIN_FLOAT32, 6); + testReturnNull(0, 6); + testReturnNull(12.213, 6); + testReturnNull(-12.213, 6); + testReturnNull(MAX_FLOAT32, 6); + testReturnNull(MIN_FLOAT32, 6); // mysql: 2000-01-11 00:00:00 // tiflash / tidb: null, warnings // testNotOnlyNull(111, {2000, 1, 11, 0, 0, 0, 0}, 6); - testThrowException(-111, 6); + testReturnNull(-111, 6); // mysql: 2000-01-11 00:00:00 // tiflash / tidb: null, warnings // testNotOnlyNull(111.1, {2000, 1, 11, 0, 0, 0, 0}, 6); @@ -1211,15 +1228,15 @@ try // mysql: null, warning. // tiflash: null, no warning. // tidb: 0000-00-00 00:00:00 - // testThrowException(0, 6); - testThrowException(12.213, 6); - testThrowException(-12.213, 6); - testThrowException(MAX_FLOAT64, 6); - testThrowException(MIN_FLOAT64, 6); + // testReturnNull(0, 6); + testReturnNull(12.213, 6); + testReturnNull(-12.213, 6); + testReturnNull(MAX_FLOAT64, 6); + testReturnNull(MIN_FLOAT64, 6); // mysql: 2000-01-11 00:00:00 // tiflash / tidb: null, warnings // testNotOnlyNull(111, {2000, 1, 11, 0, 0, 0, 0}, 6); - testThrowException(-111, 6); + testReturnNull(-111, 6); // mysql: 2000-01-11 00:00:00 // tiflash / tidb: null, warnings // testNotOnlyNull(111.1, {2000, 1, 11, 0, 0, 0, 0}, 6); diff --git a/tests/fullstack-test/expr/return_warning.test b/tests/fullstack-test/expr/return_warning.test index 02c0ec4e9b3..8eecf603bbe 100644 --- a/tests/fullstack-test/expr/return_warning.test +++ b/tests/fullstack-test/expr/return_warning.test @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +#RETURN mysql> drop table if exists test.t mysql> create table test.t(a int)