From cf99103d921f673e90e92b54133664adb6e1c28f Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Fri, 22 Apr 2022 16:48:04 +0800 Subject: [PATCH] fix date format identifies '\n' as invalid separator (#4046) (#4060) close pingcap/tiflash#4036 --- dbms/src/Common/MyTime.cpp | 63 ++++++++++--------- .../Functions/tests/gtest_tidb_conversion.cpp | 31 +++++++++ 2 files changed, 63 insertions(+), 31 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 411f7c5bd14..279462c9875 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -65,7 +65,8 @@ bool isValidSeperator(char c, int previous_parts) if (isPunctuation(c)) return true; - return previous_parts == 2 && (c == ' ' || c == 'T'); + // for https://github.com/pingcap/tics/issues/4036 + return previous_parts == 2 && (c == 'T' || isWhitespaceASCII(c)); } std::vector parseDateFormat(String format) @@ -533,8 +534,8 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t bool truncated_or_incorrect = false; - // noAbsorb tests if can absorb FSP or TZ - auto noAbsorb = [](const std::vector & seps) { + // no_absorb tests if can absorb FSP or TZ + auto no_absorb = [](const std::vector & seps) { // if we have more than 5 parts (i.e. 6), the tailing part can't be absorbed // or if we only have 1 part, but its length is longer than 4, then it is at least YYMMD, in this case, FSP can // not be absorbed, and it will be handled later, and the leading sign prevents TZ from being absorbed, because @@ -544,7 +545,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t if (!frac_str.empty()) { - if (!noAbsorb(seps)) + if (!no_absorb(seps)) { seps.push_back(frac_str); frac_str = ""; @@ -555,7 +556,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t { // if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z), // therefore we could safely skip this branch. - if (!noAbsorb(seps) && !(!tz_minute.empty() && tz_sep.empty())) + if (!no_absorb(seps) && !(!tz_minute.empty() && tz_sep.empty())) { // we can't absorb timezone if there is no separate between tz_hour and tz_minute if (!tz_hour.empty()) @@ -580,51 +581,51 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t { case 14: // YYYYMMDDHHMMSS { - std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); + std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); //NOLINT hhmmss = true; break; } case 12: // YYMMDDHHMMSS { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); //NOLINT year = adjustYear(year); hhmmss = true; break; } case 11: // YYMMDDHHMMS { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); //NOLINT year = adjustYear(year); hhmmss = true; break; } case 10: // YYMMDDHHMM { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); //NOLINT year = adjustYear(year); break; } case 9: // YYMMDDHHM { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); //NOLINT year = adjustYear(year); break; } case 8: // YYYYMMDD { - std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); + std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); //NOLINT break; } case 7: // YYMMDDH { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour); + std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour); //NOLINT year = adjustYear(year); break; } case 6: // YYMMDD case 5: // YYMMD { - std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); + std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); //NOLINT year = adjustYear(year); break; } @@ -649,18 +650,18 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t case 1: case 2: { - ret = std::sscanf(frac_str.c_str(), "%2d ", &hour); + ret = std::sscanf(frac_str.c_str(), "%2d ", &hour); //NOLINT break; } case 3: case 4: { - ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute); + ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute); //NOLINT break; } default: { - ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second); + ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second); //NOLINT break; } } @@ -674,7 +675,7 @@ std::pair parseMyDateTimeAndJudgeIsDate(const String & str, int8_t } else { - truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0); + truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0); //NOLINT } } if (truncated_or_incorrect) @@ -1048,7 +1049,7 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const static auto is_leap_year = [](UInt16 _year) { return ((_year % 4 == 0) && (_year % 100 != 0)) || (_year % 400 == 0); }; - max_day = max_days_in_month[month - 1]; + max_day = max_days_in_month[month - 1]; // NOLINT if (month == 2 && is_leap_year(year)) { max_day = 29; @@ -1379,13 +1380,13 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; - auto skipWhitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { + auto skip_whitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; return check_if_end(); }; - auto parse_sep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { - if (skipWhitespaces() == ParseState::END_OF_FILE) + auto parse_sep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState { + if (skip_whitespaces() == ParseState::END_OF_FILE) return ParseState::END_OF_FILE; // parse ":" if (ctx.view.data[temp_pos] != ':') @@ -1402,7 +1403,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) // hh size_t step = 0; int32_t hour = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || hour > 12 || hour == 0) @@ -1418,7 +1419,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return state; int32_t minute = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || minute > 59) @@ -1430,7 +1431,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return state; int32_t second = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || second > 59) @@ -1439,7 +1440,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) temp_pos += step; // move forward int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; // "AM"/"PM" must be parsed as a single element // "11:13:56a" is an invalid input for "%r". @@ -1483,13 +1484,13 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; - auto skipWhitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { + auto skip_whitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; return check_if_end(); }; - auto parse_sep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { - if (skipWhitespaces() == ParseState::END_OF_FILE) + auto parse_sep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState { + if (skip_whitespaces() == ParseState::END_OF_FILE) return ParseState::END_OF_FILE; // parse ":" if (ctx.view.data[temp_pos] != ':') @@ -1506,7 +1507,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) // hh size_t step = 0; int32_t hour = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || hour > 23) @@ -1518,7 +1519,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return state; int32_t minute = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || minute > 59) @@ -1530,7 +1531,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return state; int32_t second = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || second > 59) diff --git a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp index fad33ea2db6..3f9abf6471f 100644 --- a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp +++ b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp @@ -940,5 +940,36 @@ try } CATCH +// for https://github.com/pingcap/tics/issues/4036 +TEST_F(TestTidbConversion, castStringAsDateTime) +try +{ + auto input = std::vector{"2012-12-12 12:12:12", "2012-12-12\t12:12:12", "2012-12-12\n12:12:12", "2012-12-12\v12:12:12", "2012-12-12\f12:12:12", "2012-12-12\r12:12:12"}; + auto to_column = createConstColumn(1, "MyDateTime(6)"); + + // vector + auto from_column = createColumn(input); + UInt64 except_packed = MyDateTime(2012, 12, 12, 12, 12, 12, 0).toPackedUInt(); + auto vector_result = executeFunction("tidb_cast", {from_column, to_column}); + for (size_t i = 0; i < input.size(); i++) + { + ASSERT_EQ(except_packed, vector_result.column.get()->get64(i)); + } + + // const + auto const_from_column = createConstColumn(1, "2012-12-12\n12:12:12"); + auto const_result = executeFunction("tidb_cast", {from_column, to_column}); + ASSERT_EQ(except_packed, const_result.column.get()->get64(0)); + + // nullable + auto nullable_from_column = createColumn>({"2012-12-12 12:12:12", "2012-12-12\t12:12:12", "2012-12-12\n12:12:12", "2012-12-12\v12:12:12", "2012-12-12\f12:12:12", "2012-12-12\r12:12:12"}); + auto nullable_result = executeFunction("tidb_cast", {from_column, to_column}); + for (size_t i = 0; i < input.size(); i++) + { + ASSERT_EQ(except_packed, nullable_result.column.get()->get64(i)); + } +} +CATCH + } // namespace } // namespace DB::tests