From 11b340570ac7cb7bf6dd1510f2cb0bfdea1a9729 Mon Sep 17 00:00:00 2001 From: Meng Xin Date: Wed, 16 Feb 2022 19:08:09 +0800 Subject: [PATCH 1/6] This is an automated cherry-pick of #4046 Signed-off-by: ti-chi-bot --- dbms/src/Common/MyTime.cpp | 71 ++++++++++++------- .../Functions/tests/gtest_tidb_conversion.cpp | 36 ++++++++++ 2 files changed, 83 insertions(+), 24 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 1613304bfe8..571690c058c 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -65,7 +65,8 @@ bool isValidSeperator(char c, int previous_parts) if (isPunctuation(c)) return true; - return previous_parts == 2 && (c == ' ' || c == 'T'); + // for https://github.com/pingcap/tics/issues/4036 + return previous_parts == 2 && (c == 'T' || isWhitespaceASCII(c)); } std::vector parseDateFormat(String format) @@ -515,8 +516,8 @@ Field parseMyDateTime(const String & str, int8_t fsp) bool truncated_or_incorrect = false; - // noAbsorb tests if can absorb FSP or TZ - auto noAbsorb = [](const std::vector & seps) { + // no_absorb tests if can absorb FSP or TZ + auto no_absorb = [](const std::vector & seps) { // if we have more than 5 parts (i.e. 6), the tailing part can't be absorbed // or if we only have 1 part, but its length is longer than 4, then it is at least YYMMD, in this case, FSP can // not be absorbed, and it will be handled later, and the leading sign prevents TZ from being absorbed, because @@ -526,7 +527,7 @@ Field parseMyDateTime(const String & str, int8_t fsp) if (!frac_str.empty()) { - if (!noAbsorb(seps)) + if (!no_absorb(seps)) { seps.push_back(frac_str); frac_str = ""; @@ -537,7 +538,11 @@ Field parseMyDateTime(const String & str, int8_t fsp) { // if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z), // therefore we could safely skip this branch. +<<<<<<< HEAD if (!noAbsorb(seps) && !(tz_minute != "" && tz_sep == "")) +======= + if (!no_absorb(seps) && !(!tz_minute.empty() && tz_sep.empty())) +>>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) { // we can't absorb timezone if there is no separate between tz_hour and tz_minute if (!tz_hour.empty()) @@ -562,51 +567,51 @@ Field parseMyDateTime(const String & str, int8_t fsp) { case 14: // YYYYMMDDHHMMSS { - std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); + std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); //NOLINT hhmmss = true; break; } case 12: // YYMMDDHHMMSS { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); //NOLINT year = adjustYear(year); hhmmss = true; break; } case 11: // YYMMDDHHMMS { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); //NOLINT year = adjustYear(year); hhmmss = true; break; } case 10: // YYMMDDHHMM { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); //NOLINT year = adjustYear(year); break; } case 9: // YYMMDDHHM { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); + std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); //NOLINT year = adjustYear(year); break; } case 8: // YYYYMMDD { - std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); + std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); //NOLINT break; } case 7: // YYMMDDH { - std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour); + std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour); //NOLINT year = adjustYear(year); break; } case 6: // YYMMDD case 5: // YYMMD { - std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); + std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); //NOLINT year = adjustYear(year); break; } @@ -630,18 +635,18 @@ Field parseMyDateTime(const String & str, int8_t fsp) case 1: case 2: { - ret = std::sscanf(frac_str.c_str(), "%2d ", &hour); + ret = std::sscanf(frac_str.c_str(), "%2d ", &hour); //NOLINT break; } case 3: case 4: { - ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute); + ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute); //NOLINT break; } default: { - ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second); + ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second); //NOLINT break; } } @@ -655,7 +660,7 @@ Field parseMyDateTime(const String & str, int8_t fsp) } else { - truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0); + truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0); //NOLINT } } if (truncated_or_incorrect) @@ -1003,7 +1008,7 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const static auto is_leap_year = [](UInt16 _year) { return ((_year % 4 == 0) && (_year % 100 != 0)) || (_year % 400 == 0); }; - max_day = max_days_in_month[month - 1]; + max_day = max_days_in_month[month - 1]; // NOLINT if (month == 2 && is_leap_year(year)) { max_day = 29; @@ -1336,13 +1341,22 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; +<<<<<<< HEAD auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { +======= + auto skip_whitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { +>>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; return checkIfEnd(); }; +<<<<<<< HEAD auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { if (skipWhitespaces() == ParseState::END_OF_FILE) +======= + auto parse_sep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState { + if (skip_whitespaces() == ParseState::END_OF_FILE) +>>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) return ParseState::END_OF_FILE; // parse ":" if (ctx.view.data[temp_pos] != ':') @@ -1359,7 +1373,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) // hh size_t step = 0; int32_t hour = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || hour > 12 || hour == 0) @@ -1375,7 +1389,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return state; int32_t minute = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || minute > 59) @@ -1387,7 +1401,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return state; int32_t second = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || second > 59) @@ -1396,7 +1410,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) temp_pos += step; // move forward int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; // "AM"/"PM" must be parsed as a single element // "11:13:56a" is an invalid input for "%r". @@ -1440,13 +1454,22 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; +<<<<<<< HEAD auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { +======= + auto skip_whitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { +>>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; return checkIfEnd(); }; +<<<<<<< HEAD auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { if (skipWhitespaces() == ParseState::END_OF_FILE) +======= + auto parse_sep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState { + if (skip_whitespaces() == ParseState::END_OF_FILE) +>>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) return ParseState::END_OF_FILE; // parse ":" if (ctx.view.data[temp_pos] != ':') @@ -1463,7 +1486,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) // hh size_t step = 0; int32_t hour = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || hour > 23) @@ -1475,7 +1498,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return state; int32_t minute = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || minute > 59) @@ -1487,7 +1510,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return state; int32_t second = 0; - if (state = skipWhitespaces(); state != ParseState::NORMAL) + if (state = skip_whitespaces(); state != ParseState::NORMAL) return state; std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2); if (step == 0 || second > 59) diff --git a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp index 315fdaf5191..d17b2fafc82 100644 --- a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp +++ b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp @@ -172,5 +172,41 @@ try } CATCH +<<<<<<< HEAD } // namespace tests } // namespace DB +======= +// for https://github.com/pingcap/tics/issues/4036 +TEST_F(TestTidbConversion, castStringAsDateTime) +try +{ + auto input = std::vector{"2012-12-12 12:12:12", "2012-12-12\t12:12:12", "2012-12-12\n12:12:12", "2012-12-12\v12:12:12", "2012-12-12\f12:12:12", "2012-12-12\r12:12:12"}; + auto to_column = createConstColumn(1, "MyDateTime(6)"); + + // vector + auto from_column = createColumn(input); + UInt64 except_packed = MyDateTime(2012, 12, 12, 12, 12, 12, 0).toPackedUInt(); + auto vector_result = executeFunction("tidb_cast", {from_column, to_column}); + for (size_t i = 0; i < input.size(); i++) + { + ASSERT_EQ(except_packed, vector_result.column.get()->get64(i)); + } + + // const + auto const_from_column = createConstColumn(1, "2012-12-12\n12:12:12"); + auto const_result = executeFunction("tidb_cast", {from_column, to_column}); + ASSERT_EQ(except_packed, const_result.column.get()->get64(0)); + + // nullable + auto nullable_from_column = createColumn>({"2012-12-12 12:12:12", "2012-12-12\t12:12:12", "2012-12-12\n12:12:12", "2012-12-12\v12:12:12", "2012-12-12\f12:12:12", "2012-12-12\r12:12:12"}); + auto nullable_result = executeFunction("tidb_cast", {from_column, to_column}); + for (size_t i = 0; i < input.size(); i++) + { + ASSERT_EQ(except_packed, nullable_result.column.get()->get64(i)); + } +} +CATCH + +} // namespace +} // namespace DB::tests +>>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) From ce33d63b8b33d9a83e056f2315c1cae495f409ed Mon Sep 17 00:00:00 2001 From: Meng Xin <363042881@qq.com> Date: Tue, 22 Feb 2022 11:28:59 +0800 Subject: [PATCH 2/6] Update MyTime.cpp solve conflict --- dbms/src/Common/MyTime.cpp | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 571690c058c..c784eff34cf 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -538,11 +538,7 @@ Field parseMyDateTime(const String & str, int8_t fsp) { // if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z), // therefore we could safely skip this branch. -<<<<<<< HEAD if (!noAbsorb(seps) && !(tz_minute != "" && tz_sep == "")) -======= - if (!no_absorb(seps) && !(!tz_minute.empty() && tz_sep.empty())) ->>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) { // we can't absorb timezone if there is no separate between tz_hour and tz_minute if (!tz_hour.empty()) @@ -1341,22 +1337,13 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; -<<<<<<< HEAD auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { -======= - auto skip_whitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { ->>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; return checkIfEnd(); }; -<<<<<<< HEAD auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { if (skipWhitespaces() == ParseState::END_OF_FILE) -======= - auto parse_sep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState { - if (skip_whitespaces() == ParseState::END_OF_FILE) ->>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) return ParseState::END_OF_FILE; // parse ":" if (ctx.view.data[temp_pos] != ':') @@ -1454,22 +1441,13 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; -<<<<<<< HEAD auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { -======= - auto skip_whitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { ->>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; return checkIfEnd(); }; -<<<<<<< HEAD auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { if (skipWhitespaces() == ParseState::END_OF_FILE) -======= - auto parse_sep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState { - if (skip_whitespaces() == ParseState::END_OF_FILE) ->>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) return ParseState::END_OF_FILE; // parse ":" if (ctx.view.data[temp_pos] != ':') From e5ca26103d5313c0959168aea5b5c7d834132b4b Mon Sep 17 00:00:00 2001 From: Meng Xin <363042881@qq.com> Date: Tue, 22 Feb 2022 11:30:43 +0800 Subject: [PATCH 3/6] Update gtest_tidb_conversion.cpp solve conflict --- dbms/src/Functions/tests/gtest_tidb_conversion.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp index d17b2fafc82..db205744707 100644 --- a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp +++ b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp @@ -172,10 +172,6 @@ try } CATCH -<<<<<<< HEAD -} // namespace tests -} // namespace DB -======= // for https://github.com/pingcap/tics/issues/4036 TEST_F(TestTidbConversion, castStringAsDateTime) try @@ -209,4 +205,3 @@ CATCH } // namespace } // namespace DB::tests ->>>>>>> 745bcce2a5 (fix date format identifies '\n' as invalid separator (#4046)) From 4dc7e919adf09d8544323b95178059808f98339b Mon Sep 17 00:00:00 2001 From: Meng Xin <363042881@qq.com> Date: Tue, 22 Feb 2022 11:32:57 +0800 Subject: [PATCH 4/6] Update MyTime.cpp pass lint --- dbms/src/Common/MyTime.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index c784eff34cf..d699d57f533 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -1337,13 +1337,13 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; - auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { + auto skip_whitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; return checkIfEnd(); }; - auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { - if (skipWhitespaces() == ParseState::END_OF_FILE) + auto parseSep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState { + if (skip_whitespaces() == ParseState::END_OF_FILE) return ParseState::END_OF_FILE; // parse ":" if (ctx.view.data[temp_pos] != ':') @@ -1441,13 +1441,13 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; - auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { + auto skip_whitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; return checkIfEnd(); }; - auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { - if (skipWhitespaces() == ParseState::END_OF_FILE) + auto parseSep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState { + if (skip_whitespaces() == ParseState::END_OF_FILE) return ParseState::END_OF_FILE; // parse ":" if (ctx.view.data[temp_pos] != ':') From 42a69c099a666c4fbbe114dafff3793f6a7ed422 Mon Sep 17 00:00:00 2001 From: Meng Xin <363042881@qq.com> Date: Tue, 22 Feb 2022 11:40:35 +0800 Subject: [PATCH 5/6] Update gtest_tidb_conversion.cpp format --- dbms/src/Functions/tests/gtest_tidb_conversion.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp index db205744707..d64f1b8c1cb 100644 --- a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp +++ b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp @@ -203,5 +203,5 @@ try } CATCH -} // namespace -} // namespace DB::tests +} // namespace tests +} // namespace DB From 0aed7b5094667b9025338a4c22db37d6c15d2e61 Mon Sep 17 00:00:00 2001 From: Meng Xin <363042881@qq.com> Date: Tue, 22 Feb 2022 11:47:25 +0800 Subject: [PATCH 6/6] Update MyTime.cpp pass lint --- dbms/src/Common/MyTime.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index d699d57f533..5589417576e 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -538,7 +538,7 @@ Field parseMyDateTime(const String & str, int8_t fsp) { // if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z), // therefore we could safely skip this branch. - if (!noAbsorb(seps) && !(tz_minute != "" && tz_sep == "")) + if (!no_absorb(seps) && !(tz_minute != "" && tz_sep == "")) { // we can't absorb timezone if there is no separate between tz_hour and tz_minute if (!tz_hour.empty())