From afd8901c9c64d3284fd823690157eb42ffacd0c5 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Sat, 22 May 2021 19:31:56 +0800 Subject: [PATCH 1/5] squash all commit of str_to_date Signed-off-by: JaySon-Huang --- dbms/src/Common/MyTime.cpp | 725 ++++++++++++++++++- dbms/src/Common/MyTime.h | 19 + dbms/src/Common/StringUtils/StringRefUtils.h | 30 + dbms/src/Common/StringUtils/StringUtils.cpp | 36 +- dbms/src/Common/StringUtils/StringUtils.h | 19 +- dbms/src/Common/tests/gtest_mytime.cpp | 161 +++- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 6 +- dbms/src/Functions/FunctionHelpers.cpp | 30 +- dbms/src/Functions/FunctionsConversion.cpp | 2 + dbms/src/Functions/FunctionsConversion.h | 111 +++ tests/fullstack-test/expr/str_to_date.test | 67 ++ 11 files changed, 1176 insertions(+), 30 deletions(-) create mode 100644 dbms/src/Common/StringUtils/StringRefUtils.h create mode 100644 tests/fullstack-test/expr/str_to_date.test diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index b249d2db136..cfa3c314b27 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -1,6 +1,10 @@ #include +#include +#include #include #include +#include +#include #include #include @@ -9,7 +13,15 @@ namespace DB { -int adjustYear(int year) +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int NOT_IMPLEMENTED; +} // namespace ErrorCodes + +// adjustYear adjusts year according to y. +// See https://dev.mysql.com/doc/refman/5.7/en/two-digit-years.html +int32_t adjustYear(int32_t year) { if (year >= 0 && year <= 69) return 2000 + year; @@ -1217,4 +1229,715 @@ MyDateTimeFormatter::MyDateTimeFormatter(const String & layout) } } +struct MyDateTimeParser::Context +{ + // Some state for `mysqlTimeFix` + uint32_t state = 0; + static constexpr uint32_t ST_DAY_OF_YEAR = 0x01; + static constexpr uint32_t ST_MERIDIEM = 0x02; + static constexpr uint32_t ST_HOUR_0_23 = 0x04; + static constexpr uint32_t ST_HOUR_1_12 = 0x08; + + int32_t day_of_year = 0; + // 0 - invalid, 1 - am, 2 - pm + int32_t meridiem = 0; + + // The input string view + const StringRef view; + // The pos we are parsing from + size_t pos = 0; + + Context(StringRef view_) : view(std::move(view_)) {} +}; + +// Try to parse digits with number of `limit` starting from view[pos] +// Return if success. +// Return <0, _> if fail. +static std::tuple parseNDigits(const StringRef & view, const size_t pos, const size_t limit) +{ + size_t step = 0; + int32_t num = 0; + while (step < limit && (pos + step) < view.size && isNumericASCII(view.data[pos + step])) + { + num = num * 10 + (view.data[pos + step] - '0'); + step += 1; + } + return std::make_tuple(step, num); +} + +static std::tuple parseYearNDigits(const StringRef & view, const size_t pos, const size_t limit) +{ + int32_t effective_count = 0; + int32_t effective_value = 0; + while (static_cast(effective_count + 1) <= limit) + { + auto [step, num] = parseNDigits(view, pos, effective_count + 1); + if (step == 0) + break; + effective_count++; + effective_value = num; + } + if (effective_count == 0) + return std::make_tuple(effective_count, 0); + else if (effective_count <= 2) + effective_value = adjustYear(effective_value); + return std::make_tuple(effective_count, effective_value); +} + +enum class ParseState +{ + NORMAL = 0, // Parsing + FAIL = 1, // Fail to parse + END_OF_FILE = 2, // The end of input +}; + +//"%r": Time, 12-hour (hh:mm:ss followed by AM or PM) +static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) +{ + // Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure + size_t temp_pos = ctx.pos; + auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState { + // To the end + if (temp_pos == ctx.view.size) + return ParseState::END_OF_FILE; + return ParseState::NORMAL; + }; + auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { + while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) + ++temp_pos; + return checkIfEnd(); + }; + auto parseSep = [&temp_pos, &ctx, &checkIfEnd, &skipWhitespaces]() -> ParseState { + if (skipWhitespaces() == ParseState::END_OF_FILE) + return ParseState::END_OF_FILE; + // parse ":" + if (ctx.view.data[temp_pos] != ':') + return ParseState::FAIL; + temp_pos += 1; // move forward + return ParseState::NORMAL; + }; + auto tryParse = [&]() -> ParseState { + ParseState state = ParseState::NORMAL; + /// Note that we should update `time` as soon as possible, or we + /// can not get correct result for incomplete input like "12:13" + /// that is less than "hh:mm:ssAM" + + // hh + size_t step = 0; + int32_t hour = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || hour > 12 || hour == 0) + return ParseState::FAIL; + // Handle special case: 12:34:56 AM -> 00:34:56 + // For PM, we will add 12 it later + if (hour == 12) + hour = 0; + time.hour = hour; + temp_pos += step; // move forward + + if (state = parseSep(); state != ParseState::NORMAL) + return state; + + int32_t minute = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || minute > 59) + return ParseState::FAIL; + time.minute = minute; + temp_pos += step; // move forward + + if (state = parseSep(); state != ParseState::NORMAL) + return state; + + int32_t second = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || second > 59) + return ParseState::FAIL; + time.second = second; + temp_pos += step; // move forward + + int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + if (toLowerIfAlphaASCII(ctx.view.data[temp_pos]) == 'a') + meridiem = 1; + else if (toLowerIfAlphaASCII(ctx.view.data[temp_pos]) == 'p') + meridiem = 2; + temp_pos += 1; // move forward + + if (state = checkIfEnd(); state != ParseState::NORMAL) + return state; + if (toLowerIfAlphaASCII(ctx.view.data[temp_pos]) != 'm') + meridiem = 0; + switch (meridiem) + { + case 0: + return ParseState::FAIL; + case 1: + break; + case 2: + time.hour += 12; + break; + } + temp_pos += 1; // move forward + return ParseState::NORMAL; + }; + if (auto state = tryParse(); state == ParseState::FAIL) + return false; + // Other state, forward the `ctx.pos` and return true + ctx.pos = temp_pos; + return true; +} + +//"%T": Time, 24-hour (hh:mm:ss) +static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) +{ + // Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure + size_t temp_pos = ctx.pos; + auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState { + // To the end + if (temp_pos == ctx.view.size) + return ParseState::END_OF_FILE; + return ParseState::NORMAL; + }; + auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { + while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) + ++temp_pos; + return checkIfEnd(); + }; + auto parseSep = [&temp_pos, &ctx, &checkIfEnd, &skipWhitespaces]() -> ParseState { + if (skipWhitespaces() == ParseState::END_OF_FILE) + return ParseState::END_OF_FILE; + // parse ":" + if (ctx.view.data[temp_pos] != ':') + return ParseState::FAIL; + temp_pos += 1; // move forward + return ParseState::NORMAL; + }; + auto tryParse = [&]() -> ParseState { + ParseState state = ParseState::NORMAL; + /// Note that we should update `time` as soon as possible, or we + /// can not get correct result for incomplete input like "12:13" + /// that is less than "hh:mm:ss" + + // hh + size_t step = 0; + int32_t hour = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || hour > 23) + return ParseState::FAIL; + time.hour = hour; + temp_pos += step; // move forward + + if (state = parseSep(); state != ParseState::NORMAL) + return state; + + int32_t minute = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || minute > 59) + return ParseState::FAIL; + time.minute = minute; + temp_pos += step; // move forward + + if (state = parseSep(); state != ParseState::NORMAL) + return state; + + int32_t second = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || second > 59) + return ParseState::FAIL; + time.second = second; + temp_pos += step; // move forward + + return ParseState::NORMAL; + }; + if (auto state = tryParse(); state == ParseState::FAIL) + return false; + // Other state, forward the `ctx.pos` and return true + ctx.pos = temp_pos; + return true; +} + +// Refer: https://github.com/pingcap/tidb/blob/v5.0.1/types/time.go#L2946 +MyDateTimeParser::MyDateTimeParser(const String & format_) : format(format_) +{ + // Ignore all prefix white spaces (TODO: handle unicode space?) + size_t format_pos = 0; + while (format_pos < format.size() && isWhitespaceASCII(format[format_pos])) + format_pos++; + + bool in_pattern_match = false; + while (format_pos < format.size()) + { + char x = format[format_pos]; + if (in_pattern_match) + { + switch (x) + { + case 'b': + { + //"%b": Abbreviated month name (Jan..Dec) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + size_t step = 0; + auto v = removePrefix(ctx.view, ctx.pos); + for (size_t p = 0; p < 12; p++) + { + if (startsWithCI(v, abbrev_month_names[p])) + { + time.month = p + 1; + step = abbrev_month_names[p].size(); + break; + } + } + if (step == 0) + return false; + ctx.pos += step; + return true; + }); + break; + } + case 'c': + { + //"%c": Month, numeric (0..12) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + // To be compatible with TiDB & MySQL, first try to take two digit and parse it as `num` + auto [step, month] = parseNDigits(ctx.view, ctx.pos, 2); + // Then check whether num is valid month + // Note that 0 is valid when sql_mode does not contain NO_ZERO_IN_DATE,NO_ZERO_DATE + if (step == 0 || month > 12) + return false; + time.month = month; + ctx.pos += step; + return true; + }); + break; + } + case 'd': //"%d": Day of the month, numeric (00..31) + [[fallthrough]]; + case 'e': //"%e": Day of the month, numeric (0..31) + { + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, day] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || day > 31) + return false; + time.day = day; + ctx.pos += step; + return true; + }); + break; + } + case 'f': + { + //"%f": Microseconds (000000..999999) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, ms] = parseNDigits(ctx.view, ctx.pos, 6); + // Empty string is a valid input + if (step == 0) + { + time.micro_second = 0; + return true; + } + // The siffix '0' can be ignored. + // "9" means 900000 + while (ms > 0 && ms * 10 < 1000000) + { + ms *= 10; + } + time.micro_second = ms; + ctx.pos += step; + return true; + }); + break; + } + case 'k': + //"%k": Hour (0..23) + [[fallthrough]]; + case 'H': + { + //"%H": Hour (00..23) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, hour] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || hour > 23) + return false; + ctx.state |= MyDateTimeParser::Context::ST_HOUR_0_23; + time.hour = hour; + ctx.pos += step; + return true; + }); + break; + } + case 'l': + //"%l": Hour (1..12) + [[fallthrough]]; + case 'I': + //"%I": Hour (01..12) + [[fallthrough]]; + case 'h': + { + //"%h": Hour (01..12) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, hour] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || hour <= 0 || hour > 12) + return false; + ctx.state |= MyDateTimeParser::Context::ST_HOUR_1_12; + time.hour = hour; + ctx.pos += step; + return true; + }); + break; + } + case 'i': + { + //"%i": Minutes, numeric (00..59) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, num] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || num > 59) + return false; + time.minute = num; + ctx.pos += step; + return true; + }); + break; + } + case 'j': + { + //"%j": Day of year (001..366) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { + auto [step, num] = parseNDigits(ctx.view, ctx.pos, 3); + if (step == 0 || num == 0 || num > 366) + return false; + ctx.state |= MyDateTimeParser::Context::ST_DAY_OF_YEAR; + ctx.day_of_year = num; + ctx.pos += step; + return true; + }); + break; + } + case 'M': + { + //"%M": Month name (January..December) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto v = removePrefix(ctx.view, ctx.pos); + size_t step = 0; + for (size_t p = 0; p < 12; p++) + { + if (startsWithCI(v, month_names[p])) + { + time.month = p + 1; + step = month_names[p].size(); + break; + } + } + if (step == 0) + return false; + ctx.pos += step; + return true; + }); + break; + } + case 'm': + { + //"%m": Month, numeric (00..12) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, month] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || month > 12) + return false; + time.month = month; + ctx.pos += step; + return true; + }); + break; + } + case 'S': + //"%S": Seconds (00..59) + [[fallthrough]]; + case 's': + { + //"%s": Seconds (00..59) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, second] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || second > 59) + return false; + time.second = second; + ctx.pos += step; + return true; + }); + break; + } + case 'p': + { + //"%p": AM or PM + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { + // Check the offset that will visit + if (ctx.view.size - ctx.pos < 2) + return false; + + int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm + if (toLowerIfAlphaASCII(ctx.view.data[ctx.pos]) == 'a') + meridiem = 1; + else if (toLowerIfAlphaASCII(ctx.view.data[ctx.pos]) == 'p') + meridiem = 2; + + if (toLowerIfAlphaASCII(ctx.view.data[ctx.pos + 1]) != 'm') + meridiem = 0; + + if (meridiem == 0) + return false; + + ctx.state |= MyDateTimeParser::Context::ST_MERIDIEM; + ctx.meridiem = meridiem; + ctx.pos += 2; + return true; + }); + break; + } + case 'r': + { + //"%r": Time, 12-hour (hh:mm:ss followed by AM or PM) + parsers.emplace_back(parseTime12Hour); + break; + } + case 'T': + { + //"%T": Time, 24-hour (hh:mm:ss) + parsers.emplace_back(parseTime24Hour); + break; + } + case 'Y': + { + //"%Y": Year, numeric, four digits + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, year] = parseYearNDigits(ctx.view, ctx.pos, 4); + if (step == 0) + return false; + time.year = year; + ctx.pos += step; + return true; + }); + break; + } + case 'y': + { + //"%y": Year, numeric, two digits. Deprecated since MySQL 5.7.5 + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, year] = parseYearNDigits(ctx.view, ctx.pos, 2); + if (step == 0) + return false; + time.year = year; + ctx.pos += step; + return true; + }); + break; + } + case '#': + { + //"%#": Skip all numbers + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { + // TODO: Does ASCII numeric the same with unicode numeric? + size_t temp_pos = ctx.pos; + while (temp_pos < ctx.view.size && isNumericASCII(ctx.view.data[temp_pos])) + temp_pos++; + ctx.pos = temp_pos; + return true; + }); + break; + } + case '.': + { + //"%.": Skip all punctation characters + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { + // TODO: Does ASCII punctuation the same with unicode punctuation? + size_t temp_pos = ctx.pos; + while (temp_pos < ctx.view.size && isPunctuation(ctx.view.data[temp_pos])) + temp_pos++; + ctx.pos = temp_pos; + return true; + }); + break; + } + case '@': + { + //"%@": Skip all alpha characters + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { + // TODO: Does ASCII alpha the same with unicode alpha? + size_t temp_pos = ctx.pos; + while (temp_pos < ctx.view.size && isAlphaASCII(ctx.view.data[temp_pos])) + temp_pos++; + ctx.pos = temp_pos; + return true; + }); + break; + } + case '%': + { + //"%%": A literal % character + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { +#if 0 + if (ctx.view.data[ctx.pos] != '%') + return false; + ctx.pos++; + return true; +#else + // FIXME: Ignored by now, both tidb 5.0.0 and mariadb 10.3.14 can not handle it + std::ignore = ctx; + return false; +#endif + }); + break; + } + default: + throw Exception( + "Unknown date format pattern, [format=" + format + "] [pattern=" + x + "] [pos=" + DB::toString(format_pos) + "]", + ErrorCodes::BAD_ARGUMENTS); + } + // end the state of pattern match + in_pattern_match = false; + // move format_pos forward + format_pos++; + continue; + } + + if (x == '%') + { + in_pattern_match = true; + // move format_pos forward + format_pos++; + } + else + { + // Ignore whitespace for literal forwarding (TODO: handle unicode space?) + while (format_pos < format.size() && isWhitespaceASCII(format[format_pos])) + format_pos++; + // Move forward ctx.view with a sequence of literal `format[format_pos:span_end]` + size_t span_end = format_pos; + while (span_end < format.size() && format[span_end] != '%' && !isWhitespaceASCII(format[span_end])) + ++span_end; + const size_t span_size = span_end - format_pos; + if (span_size > 0) + { + StringRef format_view{format.data() + format_pos, span_size}; + parsers.emplace_back([format_view](MyDateTimeParser::Context & ctx, MyTimeBase &) { + assert(format_view.size > 0); + if (format_view.size == 1) + { + // Shortcut for only 1 char + if (ctx.view.data[ctx.pos] != format_view.data[0]) + return false; + ctx.pos += 1; + return true; + } + // Try best to match input as most literal as possible + auto v = removePrefix(ctx.view, ctx.pos); + size_t v_step = 0; + for (size_t format_step = 0; format_step < format_view.size; ++format_step) + { + // Ignore prefix whitespace for input + while (v_step < v.size && isWhitespaceASCII(v.data[v_step])) + ++v_step; + if (v_step == v.size) // To the end + break; + // Try to match literal + if (v.data[v_step] != format_view.data[format_step]) + return false; + ++v_step; + } + ctx.pos += v_step; + return true; + }); + } + // move format_pos forward + format_pos = span_end; + } + } +} + +bool mysqlTimeFix(const MyDateTimeParser::Context & ctx, MyTimeBase & my_time) +{ + // TODO: Implement the function that converts day of year to yy:mm:dd + if (ctx.state & MyDateTimeParser::Context::ST_DAY_OF_YEAR) + { + // %j Day of year (001..366) set + throw Exception("%j set, parsing day of year is not implemented", ErrorCodes::NOT_IMPLEMENTED); + } + + if (ctx.state & MyDateTimeParser::Context::ST_MERIDIEM) + { + // %H (00..23) set, should not set AM/PM + if (ctx.state & MyDateTimeParser::Context::ST_HOUR_0_23) + return false; + if (my_time.hour == 0) + return false; + if (my_time.hour == 12) + { + // 12 is a special hour. + if (ctx.meridiem == 1) // AM + my_time.hour = 0; + else if (ctx.meridiem == 2) // PM + my_time.hour = 12; + return true; + } + if (ctx.meridiem == 2) // PM + my_time.hour += 12; + } + else + { + // %h (01..12) set + if ((ctx.state & MyDateTimeParser::Context::ST_HOUR_1_12) && my_time.hour == 12) + my_time.hour = 0; // why? + } + return true; +} + +std::optional MyDateTimeParser::parseAsPackedUInt(const StringRef & str_view) const +{ + MyTimeBase my_time{0, 0, 0, 0, 0, 0, 0}; + MyDateTimeParser::Context ctx(str_view); + + // TODO: can we return warnings to TiDB? + for (auto & f : parsers) + { + // Ignore all prefix white spaces before each pattern match (TODO: handle unicode space?) + while (ctx.pos < str_view.size && isWhitespaceASCII(str_view.data[ctx.pos])) + ctx.pos++; + // To the end of input, exit (successfully) even if there is more patterns to match + if (ctx.pos == ctx.view.size) + break; + + if (f(ctx, my_time) != true) + { +#ifndef NDEBUG + LOG_TRACE(&Logger::get("MyDateTimeParser"), + "parse error, [str=" << ctx.view.toString() << "] [format=" << format << "] [parse_pos=" << ctx.pos << "]"); +#endif + return std::nullopt; + } + + // `ctx.pos` > `ctx.view.size` after callback, must be something wrong + if (unlikely(ctx.pos > ctx.view.size)) + { + throw Exception(String(__PRETTY_FUNCTION__) + ": parse error, pos overflow. [str=" + ctx.view.toString() + "] [format=" + format + + "] [parse_pos=" + DB::toString(ctx.pos) + "] [size=" + DB::toString(ctx.view.size) + "]"); + } + } + // Extra characters at the end of date are ignored, but a warning should be reported at this case + // if (ctx.pos < ctx.view.size) {} + + // Handle the var in `ctx` + if (!mysqlTimeFix(ctx, my_time)) + return std::nullopt; + + return my_time.toPackedUInt(); +} + } // namespace DB diff --git a/dbms/src/Common/MyTime.h b/dbms/src/Common/MyTime.h index 7dc8001b025..7543636176b 100644 --- a/dbms/src/Common/MyTime.h +++ b/dbms/src/Common/MyTime.h @@ -3,6 +3,7 @@ #include #include +struct StringRef; namespace DB { @@ -133,6 +134,24 @@ struct MyDateTimeFormatter } }; +struct MyDateTimeParser +{ + explicit MyDateTimeParser(const String & format_); + + std::optional parseAsPackedUInt(const StringRef & str_view) const; + + struct Context; + +private: + const String format; + + // Parsing method. Parse from ctx.view[ctx.pos]. + // If success, update `datetime`, `ctx` and return true. + // If fail, return false. + using ParserCallback = std::function; + std::vector parsers; +}; + Field parseMyDateTime(const String & str, int8_t fsp = 6); void convertTimeZone(UInt64 from_time, UInt64 & to_time, const DateLUTImpl & time_zone_from, const DateLUTImpl & time_zone_to); diff --git a/dbms/src/Common/StringUtils/StringRefUtils.h b/dbms/src/Common/StringUtils/StringRefUtils.h new file mode 100644 index 00000000000..8b3910159b2 --- /dev/null +++ b/dbms/src/Common/StringUtils/StringRefUtils.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +inline bool startsWith(const StringRef & view, const StringRef & prefix) +{ + return detail::startsWith(view.data, view.size, prefix.data, prefix.size); +} + +// case insensitive version of startsWith +inline bool startsWithCI(const StringRef & view, const StringRef & prefix) +{ + return detail::startsWithCI(view.data, view.size, prefix.data, prefix.size); +} + +inline bool endsWith(const StringRef & view, const char * prefix) +{ + return detail::endsWith(view.data, view.size, prefix, strlen(prefix)); // +} + +// case insensitive version of endsWith +inline bool endsWithCI(const StringRef & view, const char * prefix) +{ + return detail::endsWithCI(view.data, view.size, prefix, strlen(prefix)); +} + +// n - number of characters to remove from the start of the view, +// The behavior is undefined if `n > view.size` +inline StringRef removePrefix(const StringRef & view, size_t n) { return StringRef{view.data + n, view.size - n}; } diff --git a/dbms/src/Common/StringUtils/StringUtils.cpp b/dbms/src/Common/StringUtils/StringUtils.cpp index 676b00ce9ac..bea95cef391 100644 --- a/dbms/src/Common/StringUtils/StringUtils.cpp +++ b/dbms/src/Common/StringUtils/StringUtils.cpp @@ -1,16 +1,44 @@ #include "StringUtils.h" +#include + namespace detail { -bool startsWith(const std::string & s, const char * prefix, size_t prefix_size) +bool startsWith(const char * s, size_t size, const char * prefix, size_t prefix_size) +{ + return size >= prefix_size && 0 == memcmp(s, prefix, prefix_size); +} + +bool endsWith(const char * s, size_t size, const char * suffix, size_t suffix_size) { - return s.size() >= prefix_size && 0 == memcmp(s.data(), prefix, prefix_size); + return size >= suffix_size && 0 == memcmp(s + size - suffix_size, suffix, suffix_size); } -bool endsWith(const std::string & s, const char * suffix, size_t suffix_size) +bool startsWithCI(const char * s, size_t size, const char * prefix, size_t prefix_size) { - return s.size() >= suffix_size && 0 == memcmp(s.data() + s.size() - suffix_size, suffix, suffix_size); + if (size < prefix_size) + return false; + // case insensitive compare + for (size_t i = 0; i < prefix_size; ++i) + { + if (std::tolower(s[i]) != std::tolower(prefix[i])) + return false; + } + return true; } +bool endsWithCI(const char * s, size_t size, const char * suffix, size_t suffix_size) +{ + if (size < suffix_size) + return false; + // case insensitive compare + for (size_t i = 0; i < suffix_size; ++i) + { + if (std::tolower(s[i]) != std::tolower(suffix[i])) + return false; + } + return true; } + +} // namespace detail diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index 229f2a0638a..6d2d58c674c 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -1,25 +1,30 @@ #pragma once #include +#include #include #include namespace detail { - bool startsWith(const std::string & s, const char * prefix, size_t prefix_size); - bool endsWith(const std::string & s, const char * suffix, size_t suffix_size); -} +bool startsWith(const char * s, size_t size, const char * prefix, size_t prefix_size); +bool endsWith(const char * s, size_t size, const char * suffix, size_t suffix_size); + +// case insensitive version +bool startsWithCI(const char * s, size_t size, const char * prefix, size_t prefix_size); +bool endsWithCI(const char * s, size_t size, const char * suffix, size_t suffix_size); +} // namespace detail inline bool startsWith(const std::string & s, const std::string & prefix) { - return detail::startsWith(s, prefix.data(), prefix.size()); + return detail::startsWith(s.data(), s.size(), prefix.data(), prefix.size()); } inline bool endsWith(const std::string & s, const std::string & suffix) { - return detail::endsWith(s, suffix.data(), suffix.size()); + return detail::endsWith(s.data(), s.size(), suffix.data(), suffix.size()); } @@ -27,12 +32,12 @@ inline bool endsWith(const std::string & s, const std::string & suffix) /// string that is known at compile time. inline bool startsWith(const std::string & s, const char * prefix) { - return detail::startsWith(s, prefix, strlen(prefix)); + return detail::startsWith(s.data(), s.size(), prefix, strlen(prefix)); } inline bool endsWith(const std::string & s, const char * suffix) { - return detail::endsWith(s, suffix, strlen(suffix)); + return detail::endsWith(s.data(), s.size(), suffix, strlen(suffix)); // } /// Given an integer, return the adequate suffix for diff --git a/dbms/src/Common/tests/gtest_mytime.cpp b/dbms/src/Common/tests/gtest_mytime.cpp index 537e74fc522..3a363111f99 100644 --- a/dbms/src/Common/tests/gtest_mytime.cpp +++ b/dbms/src/Common/tests/gtest_mytime.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include @@ -223,5 +223,164 @@ catch (Exception & e) GTEST_FAIL(); } +TEST_F(TestMyTime, Parser) +try +{ + std::vector>> cases{ + {" 2/Jun", "%d/%b/%Y", MyDateTime{0, 6, 2, 0, 0, 0, 0}}, // More patterns than input string + {" liter", "lit era l", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, // More patterns than input string + // Test case for empty input + {" ", " ", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, + {" ", "%d/%b/%Y", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, + // Prefix white spaces should be ignored + {" 2/Jun/2019 ", "%d/%b/%Y", MyDateTime{2019, 6, 2, 0, 0, 0, 0}}, + {" 2/Jun/2019 ", " %d/%b/%Y", MyDateTime{2019, 6, 2, 0, 0, 0, 0}}, + // + {"31/May/2016 12:34:56.1234", "%d/%b/%Y %H:%i:%S.%f", MyDateTime{2016, 5, 31, 12, 34, 56, 123400}}, + {"31/may/2016 12:34:56.1234", "%d/%b/%Y %H:%i:%S.%f", MyDateTime{2016, 5, 31, 12, 34, 56, 123400}}, // case insensitive + {"31/mayy/2016 12:34:56.1234", "%d/%b/%Y %H:%i:%S.%f", std::nullopt}, // invalid %b + {"31/mey/2016 12:34:56.1234", "%d/%b/%Y %H:%i:%S.%f", std::nullopt}, // invalid %b + {"30/April/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", MyDateTime{2016, 4, 30, 12, 34, 56, 0}}, // empty %f is valid + {"30/april/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", MyDateTime{2016, 4, 30, 12, 34, 56, 0}}, // case insensitive + {"30/Apri/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", std::nullopt}, // invalid %M + {"30/Aprill/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", std::nullopt}, // invalid %M + {"30/Feb/2016 12:34:56.1234", "%d/%b/%Y %H:%i:%S.%f", + MyDateTime{2016, 2, 30, 12, 34, 56, 123400}}, // Feb 30th (not exist in actual) is valid for parsing (in mariadb) + {"31/April/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", MyDateTime{2016, 4, 31, 12, 34, 56, 0}}, // April 31th (not exist in actual) + {"01,5,2013 9", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 900000}}, + {"01,52013", "%d,%c%Y", std::nullopt}, // %c will try to parse '52' as month and fail + {"01,5,2013", "%d,%c,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, + {"01,5,2013 ", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, + + /// Test cases for AM/PM set + {"10:11:12 AM", "%H:%i:%S %p", std::nullopt}, // should not set %H %p at the same time + {"10:11:12 Am", "%h:%i:%S %p", MyDateTime(0, 0, 0, 10, 11, 12, 0)}, + {"10:11:12 A", "%h:%i:%S %p", std::nullopt}, // EOF while parsing "AM"/"PM" + {"00:11:12 AM", "%h:%i:%S %p", std::nullopt}, // should not happen: %p set, %h not set + {"11:12 AM", "%i:%S %p", std::nullopt}, // should not happen: %p set, %h not set + {"11:12 abcd", "%i:%S ", MyDateTime{0, 0, 0, 0, 11, 12, 0}}, // without %p, %h not set is ok + {"00:11:12 ", "%h:%i:%S ", std::nullopt}, // 0 is not a valid number of %h + {"12:11:12 AP", "%h:%i:%S %p", std::nullopt}, // only AM/PM is valid + {"12:11:12 AM", "%h:%i:%S %p", MyDateTime(0, 0, 0, 0, 11, 12, 0)}, + {"12:11:12 PM", "%h:%i:%S %p", MyDateTime(0, 0, 0, 12, 11, 12, 0)}, + {"11:11:12 pM", "%h:%i:%S %p", MyDateTime(0, 0, 0, 23, 11, 12, 0)}, + /// Special case for %h with 12 + {"12:11:23 ", "%h:%i:%S ", MyDateTime(0, 0, 0, 0, 11, 23, 0)}, + // For %% -- FIXME: Ignored by now, both tidb and mariadb 10.3.14 can not handle it + // {"01/Feb/2016 % 23:45:54", "%d/%b/%Y %% %H:%i:%S", MyDateTime(2016, 2, 1, 23, 45, 54, 0)}, + // {"01/Feb/2016 %% 23:45:54", "%d/%b/%Y %%%% %H:%i:%S", MyDateTime(2016, 2, 1, 23, 45, 54, 0)}, + {"01/Feb/2016 % 23:45:54", "%d/%b/%Y %% %H:%i:%S", std::nullopt}, + {"01/Feb/2016 %% 23:45:54", "%d/%b/%Y %%%% %H:%i:%S", std::nullopt}, + + /// Test cases for %r + {" 04 :13:56 AM13/05/2019", "%r %d/%c/%Y", MyDateTime{2019, 5, 13, 4, 13, 56, 0}}, + {"13:13:56 AM13/5/2019", "%r", std::nullopt}, // hh = 13 with am is invalid + {"00:13:56 AM13/05/2019", "%r", std::nullopt}, // hh = 0 with am is invalid + {"00:13:56 pM13/05/2019", "%r", std::nullopt}, // hh = 0 with pm is invalid + {"12: 13:56 AM 13/05/2019", "%r%d/%c/%Y", MyDateTime{2019, 5, 13, 0, 13, 56, 0}}, + {"12:13 :56 pm 13/05/2019", "%r %d/%c/%Y", MyDateTime{2019, 5, 13, 12, 13, 56, 0}}, + {"11:13: 56pm 13/05/2019", "%r %d/%c/%Y", MyDateTime{2019, 5, 13, 23, 13, 56, 0}}, + {"11:13:56a", "%r", std::nullopt}, // EOF while parsing "AM"/"PM" + {"11:13", "%r", MyDateTime{0, 0, 0, 11, 13, 0, 0}}, + {"11:", "%r", MyDateTime{0, 0, 0, 11, 0, 0, 0}}, + {"12", "%r", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, + + /// Test cases for %T + {" 4 :13:56 13/05/2019", "%T %d/%c/%Y", MyDateTime{2019, 5, 13, 4, 13, 56, 0}}, + {"23: 13:56 13/05/2019", "%T%d/%c/%Y", MyDateTime{2019, 5, 13, 23, 13, 56, 0}}, + {"12:13 :56 13/05/2019", "%T %d/%c/%Y", MyDateTime{2019, 5, 13, 12, 13, 56, 0}}, + {"19:13: 56 13/05/2019", "%T %d/%c/%Y", MyDateTime{2019, 5, 13, 19, 13, 56, 0}}, + {"21:13", "%T", MyDateTime{0, 0, 0, 21, 13, 0, 0}}, + {"21:", "%T", MyDateTime{0, 0, 0, 21, 0, 0, 0}}, + + // mutiple chars between pattern + {"01/Feb/2016 abcdefg 23:45:54", "%d/%b/%Y abcdefg %H:%i:%S", MyDateTime(2016, 2, 1, 23, 45, 54, 0)}, + // the number of whitespace between pattern and input doesn't matter + {"01/Feb/2016 abcdefg 23:45: 54", "%d/%b/%Y abcdefg %H :%i:%S", MyDateTime(2016, 2, 1, 23, 45, 54, 0)}, + {"01/Feb/ 2016 abc defg 23:45:54", "%d/ %b/%Y abcdefg %H: %i:%S", MyDateTime(2016, 2, 1, 23, 45, 54, 0)}, + {"01/Feb /2016 ab cdefg 23: 45:54", "%d /%b/%Y abc defg %H:%i :%S", MyDateTime{2016, 2, 1, 23, 45, 54, 0}}, + + /// Cases collect from MySQL 8.0 document + {"01,5,2013", "%d,%m,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, + {"May 1, 2013", "%M %d,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, + {"a09:30:17", "a%h:%i:%s", MyDateTime{0, 0, 0, 9, 30, 17, 0}}, + {"a09:30:17", "%h:%i:%s", std::nullopt}, + {"09:30:17a", "%h:%i:%s", MyDateTime{0, 0, 0, 9, 30, 17, 0}}, + {"abc", "abc", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, + {"9", "%m", MyDateTime{0, 9, 0, 0, 0, 0, 0}}, + {"9", "%s", MyDateTime{0, 0, 0, 0, 0, 9, 0}}, + // Range checking on the parts of date values is as described in Section 11.2.2, “The DATE, DATETIME, and TIMESTAMP Types”. This means, for example, that “zero” dates or dates with part values of 0 are permitted unless the SQL mode is set to disallow such values. + {"00/00/0000", "%m/%d/%Y", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, + {"04/31/2004", "%m/%d/%Y", MyDateTime{2004, 4, 31, 0, 0, 0, 0}}, + + /// Below cases are ported from TiDB + {"10/28/2011 9:46:29 pm", "%m/%d/%Y %l:%i:%s %p", MyDateTime(2011, 10, 28, 21, 46, 29, 0)}, + {"10/28/2011 9:46:29 Pm", "%m/%d/%Y %l:%i:%s %p", MyDateTime(2011, 10, 28, 21, 46, 29, 0)}, + {"2011/10/28 9:46:29 am", "%Y/%m/%d %l:%i:%s %p", MyDateTime(2011, 10, 28, 9, 46, 29, 0)}, + {"20161122165022", "%Y%m%d%H%i%s", MyDateTime(2016, 11, 22, 16, 50, 22, 0)}, + {"2016 11 22 16 50 22", "%Y%m%d%H%i%s", MyDateTime(2016, 11, 22, 16, 50, 22, 0)}, // fail, should ignore sep + {"16-50-22 2016 11 22", "%H-%i-%s%Y%m%d", MyDateTime(2016, 11, 22, 16, 50, 22, 0)}, // fail, should ignore sep + {"16-50 2016 11 22", "%H-%i-%s%Y%m%d", std::nullopt}, + {"15-01-2001 1:59:58.999", "%d-%m-%Y %I:%i:%s.%f", MyDateTime(2001, 1, 15, 1, 59, 58, 999000)}, + {"15-01-2001 1:59:58.1", "%d-%m-%Y %H:%i:%s.%f", MyDateTime(2001, 1, 15, 1, 59, 58, 100000)}, + {"15-01-2001 1:59:58.", "%d-%m-%Y %H:%i:%s.%f", MyDateTime(2001, 1, 15, 1, 59, 58, 0)}, + {"15-01-2001 1:9:8.999", "%d-%m-%Y %H:%i:%s.%f", MyDateTime(2001, 1, 15, 1, 9, 8, 999000)}, + {"15-01-2001 1:9:8.999", "%d-%m-%Y %H:%i:%S.%f", MyDateTime(2001, 1, 15, 1, 9, 8, 999000)}, + {"2003-01-02 10:11:12 PM", "%Y-%m-%d %H:%i:%S %p", std::nullopt}, // should not set %H %p at the same time + {"10:20:10AM", "%H:%i:%S%p", std::nullopt}, // should not set %H %p at the same time + // test %@(skip alpha), %#(skip number), %.(skip punct) + {"2020-10-10ABCD", "%Y-%m-%d%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"2020-10-101234", "%Y-%m-%d%#", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"2020-10-10....", "%Y-%m-%d%.", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"2020-10-10.1", "%Y-%m-%d%.%#%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"abcd2020-10-10.1", "%@%Y-%m-%d%.%#%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"abcd-2020-10-10.1", "%@-%Y-%m-%d%.%#%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"2020-10-10", "%Y-%m-%d%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"2020-10-10abcde123abcdef", "%Y-%m-%d%@%#", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + }; + auto result_formatter = MyDateTimeFormatter("%Y/%m/%d %T.%f"); + size_t idx = 0; + for (const auto & [input, fmt, expected] : cases) + { + MyDateTimeParser parser(fmt); + auto packed = parser.parseAsPackedUInt(input); + if (expected == std::nullopt) + { + MyTimeBase actual_time; + String actual_str; + if (packed) + { + actual_time = MyTimeBase(*packed); + result_formatter.format(actual_time, actual_str); + } + EXPECT_FALSE((bool)packed) // + << "[case=" << idx << "] " + << "[fmt=" << fmt << "] [input=" << input << "] [actual=" << actual_str << "]"; + } + else + { + MyTimeBase actual_time; + String actual_str, expect_str; + result_formatter.format(*expected, expect_str); + if (packed) + { + actual_time = MyTimeBase(*packed); + result_formatter.format(actual_time, actual_str); + EXPECT_EQ(*packed, expected->toPackedUInt()) + << "[case=" << idx << "] " + << "[fmt=" << fmt << "] [input=" << input << "] [expect=" << expect_str << "] [actual=" << actual_str << "]"; + } + else + { + EXPECT_TRUE((bool)packed) // + << "[case=" << idx << "] " + << "[fmt=" << fmt << "] [input=" << input << "] [expect=" << expect_str << "] [actual=]"; + } + } + idx++; + } +} +CATCH + } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 4f12f42806e..573bbd5e255 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -912,9 +912,9 @@ std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::TimestampLiteral, "cast"}, //{tipb::ScalarFuncSig::LastDay, "cast"}, - //{tipb::ScalarFuncSig::StrToDateDate, "cast"}, - //{tipb::ScalarFuncSig::StrToDateDatetime, "cast"}, - //{tipb::ScalarFuncSig::StrToDateDuration, "cast"}, + {tipb::ScalarFuncSig::StrToDateDate, "strToDateDate"}, + {tipb::ScalarFuncSig::StrToDateDatetime, "strToDateDatetime"}, + // {tipb::ScalarFuncSig::StrToDateDuration, "cast"}, {tipb::ScalarFuncSig::FromUnixTime1Arg, "fromUnixTime"}, {tipb::ScalarFuncSig::FromUnixTime2Arg, "fromUnixTime"}, {tipb::ScalarFuncSig::ExtractDatetime, "extractMyDateTime"}, //{tipb::ScalarFuncSig::ExtractDuration, "cast"}, diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp index c3a038867c2..cb060c6644a 100644 --- a/dbms/src/Functions/FunctionHelpers.cpp +++ b/dbms/src/Functions/FunctionHelpers.cpp @@ -1,10 +1,11 @@ -#include -#include -#include #include #include +#include +#include #include +#include #include + #include "FunctionsArithmetic.h" @@ -18,8 +19,7 @@ const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * co const ColumnConst * res = static_cast(column); - if (checkColumn(&res->getDataColumn()) - || checkColumn(&res->getDataColumn())) + if (checkColumn(&res->getDataColumn()) || checkColumn(&res->getDataColumn())) return res; return {}; @@ -67,13 +67,18 @@ static Block createBlockWithNestedColumnsImpl(const Block & block, const std::un } else if (col.column->isColumnConst()) { - const auto & nested_col = static_cast( - static_cast(*col.column).getDataColumn()).getNestedColumnPtr(); + const auto & nested_col = static_cast( // + static_cast(*col.column).getDataColumn()) + .getNestedColumnPtr(); - res.insert({ ColumnConst::create(nested_col, rows), nested_type, col.name}); + res.insert({ColumnConst::create(nested_col, rows), nested_type, col.name}); } else - throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Illegal column for DataTypeNullable:" + col.type->getName() + " [column_name=" + col.name + + "] [created=" + DB::toString(bool(col.column)) + + "] [nullable=" + (col.column ? DB::toString(bool(col.column->isColumnNullable())) : "null") + + "] [const=" + (col.column ? DB::toString(bool(col.column->isColumnConst())) : "null") + "]", + ErrorCodes::ILLEGAL_COLUMN); } else res.insert(col); @@ -96,14 +101,11 @@ Block createBlockWithNestedColumns(const Block & block, const ColumnNumbers & ar return createBlockWithNestedColumnsImpl(block, args_set); } -bool functionIsInOperator(const String & name) -{ - return name == "in" || name == "notIn" || name == "tidbIn" || name == "tidbNotIn"; -} +bool functionIsInOperator(const String & name) { return name == "in" || name == "notIn" || name == "tidbIn" || name == "tidbNotIn"; } bool functionIsInOrGlobalInOperator(const String & name) { return name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn" || name == "tidbIn" || name == "tidbNotIn"; } -} +} // namespace DB diff --git a/dbms/src/Functions/FunctionsConversion.cpp b/dbms/src/Functions/FunctionsConversion.cpp index 0ef4369d4ac..69d18595f76 100644 --- a/dbms/src/Functions/FunctionsConversion.cpp +++ b/dbms/src/Functions/FunctionsConversion.cpp @@ -91,6 +91,8 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index d30420db2ad..e09f7b93280 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -1588,7 +1588,118 @@ class FunctionDateFormat : public IFunction throw Exception("Second argument for function " + getName() + " must be String constant", ErrorCodes::ILLEGAL_COLUMN); } } +}; + +struct NameStrToDateDate +{ + static constexpr auto name = "strToDateDate"; +}; +struct NameStrToDateDatetime +{ + static constexpr auto name = "strToDateDatetime"; +}; +template +class FunctionStrToDate : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + bool isInjective(const Block &) override { return false; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 2) + throw Exception("Function " + getName() + " only accept 2 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + // TODO: Maybe FixedString? + if (!removeNullable(arguments[0].type)->isString()) + throw Exception("First argument for function " + getName() + " must be String, but get " + arguments[0].type->getName(), + ErrorCodes::ILLEGAL_COLUMN); + if (!arguments[1].type->isString()) + throw Exception( + "Second argument for function " + getName() + " must be String constant, but get " + arguments[1].type->getName(), + ErrorCodes::ILLEGAL_COLUMN); + + if constexpr (std::is_same_v) + { + // FIXME: Should it be nullable for invalid result? + // FIXME: set fraction for DataTypeMyDateTime + return makeNullable(std::make_shared()); + } + else if constexpr (std::is_same_v) + { + // FIXME: Should it be nullable for invalid result? + return makeNullable(std::make_shared()); + } + else + { + throw Exception("Unknown name for FunctionStrToDate:" + getName(), ErrorCodes::LOGICAL_ERROR); + } + } + + // FIXME: Should we override other method? + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + { + const auto & input_column = block.getByPosition(arguments[0]).column; + const size_t num_rows = input_column->size(); + const ColumnString * col_from = nullptr; + if (input_column->isColumnNullable()) + { + auto null_input_column = checkAndGetColumn(input_column.get()); + col_from = checkAndGetColumn(null_input_column->getNestedColumnPtr().get()); + } + else + { + col_from = checkAndGetColumn(input_column.get()); + } + + auto datetime_column = ColumnVector::create(num_rows); + auto & datetime_res = datetime_column->getData(); + auto null_column = ColumnUInt8::create(num_rows); + auto & null_res = null_column->getData(); + + const auto & format_col = block.getByPosition(arguments[1]).column; + if (format_col->isColumnConst()) + { + const auto & col_const = checkAndGetColumnConst(format_col.get()); + auto format = col_const->getValue(); + + auto parser = MyDateTimeParser(format); + for (size_t i = 0; i < num_rows; i++) + { + if (input_column->isColumnNullable()) + { + null_res[i] = input_column->isNullAt(i); + continue; + } + + const auto str_ref = col_from->getDataAt(i); + if (auto parse_res = parser.parseAsPackedUInt(str_ref); parse_res) + { + datetime_res[i] = *parse_res; + null_res[i] = 0; + } + else + { + datetime_res[i] = 0; + null_res[i] = 1; + } + } + block.getByPosition(result).column = ColumnNullable::create(std::move(datetime_column), std::move(null_column)); + } + else + { + // TODO: the second argument could be a column, support it later. + throw Exception("Second argument for function " + getName() + " must be String constant", ErrorCodes::ILLEGAL_COLUMN); + } + } }; diff --git a/tests/fullstack-test/expr/str_to_date.test b/tests/fullstack-test/expr/str_to_date.test new file mode 100644 index 00000000000..7bded2f0490 --- /dev/null +++ b/tests/fullstack-test/expr/str_to_date.test @@ -0,0 +1,67 @@ +mysql> drop table if exists test.t +mysql> create table test.t(a char(64), suite int not null) +mysql> alter table test.t set tiflash replica 1 +mysql> insert into test.t values ('00/00/0000', 1),('13/05/2019', 1),('0/0/2012',1),('abc', 1); +mysql> insert into test.t values ('31/May /2016 12: 34:56.1234', 2),('30/Apr/2016 12:34:56.', 2),('30/Apr/2016 12:34:56.9', 2); +mysql> insert into test.t values ('31 /May/2016 12: 34:56.', 3),('30/Apr/2016 12:34:56', 3); +mysql> insert into test.t values ('31/May/2016', 4),('30/ Apr/ 2016 ', 4),(' 1/Apr/2016 ', 4); + +func> wait_table test t + +# Note that we need to put `str_to_date` in group by to make sure it is pushed down + +## Test suite 1 - Allow zero day +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('0000/00/00', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('0000/00/00', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by date order by a ++------------+------------+-----+ +| a | date | cnt | ++------------+------------+-----+ +| 0/0/2012 | 2012-00-00 | 1 | +| 00/00/0000 | 0000-00-00 | 2 | +| 13/05/2019 | 2019-05-13 | 1 | ++------------+------------+-----+ + +## Test suite 1 - Disallow zero day +#mysql> set sql_mode='NO_ZERO_IN_DATE,NO_ZERO_DATE'; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('0000/00/00', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by date order by a +mysql> set sql_mode='NO_ZERO_IN_DATE,NO_ZERO_DATE'; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('0000/00/00', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by date order by a ++------------+------------+-----+ +| a | date | cnt | ++------------+------------+-----+ +| 0/0/2012 | NULL | 1 | +| 00/00/0000 | NULL | 2 | +| 13/05/2019 | 2019-05-13 | 1 | ++------------+------------+-----+ + +## Test suite 2 - showing datetime with fractions +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, '%d/%b/%Y %H:%i:%S.%f') as date from test.t where suite = 2 group by date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select /*+ agg_to_cop() */ a, str_to_date(a, '%d/%b/%Y %H:%i:%S.%f') as date from test.t where suite = 2 group by date order by a ++-----------------------------+----------------------------+ +| a | date | ++-----------------------------+----------------------------+ +| 30/Apr/2016 12:34:56. | 2016-04-30 12:34:56.000000 | +| 30/Apr/2016 12:34:56.9 | 2016-04-30 12:34:56.900000 | +| 31/May /2016 12: 34:56.1234 | 2016-05-31 12:34:56.123400 | ++-----------------------------+----------------------------+ + +## Test suite 3 - showing datetime without fractions +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, ' %d/%b/%Y %H:%i:%S') as date from test.t where suite = 3 group by date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, str_to_date(a, ' %d/%b/%Y %H:%i:%S') as date from test.t where suite = 3 group by date order by a ++-------------------------+---------------------+ +| a | date | ++-------------------------+---------------------+ +| 30/Apr/2016 12:34:56 | 2016-04-30 12:34:56 | +| 31 /May/2016 12: 34:56. | 2016-05-31 12:34:56 | ++-------------------------+---------------------+ + +## Test suite 4 - showing date +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, '%d/%b/%Y ') as date from test.t where suite = 4 group by date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, str_to_date(a, '%d/%b/%Y ') as date from test.t where suite = 4 group by date order by a ++---------------+------------+ +| a | date | ++---------------+------------+ +| 1/Apr/2016 | 2016-04-01 | +| 30/ Apr/ 2016 | 2016-04-30 | +| 31/May/2016 | 2016-05-31 | ++---------------+------------+ + +mysql> drop table if exists test.t From 0f70f26a88f2d0ba16f9a73ab097a071aabce014 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Sun, 23 May 2021 21:13:40 +0800 Subject: [PATCH 2/5] Confirm test pass Signed-off-by: JaySon-Huang --- dbms/src/Common/MyTime.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index cfa3c314b27..57292e72fa5 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -1364,15 +1364,16 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm if (state = skipWhitespaces(); state != ParseState::NORMAL) return state; + // "AM"/"PM" must be parsed as a single element + // "11:13:56a" is an invalid input for "%r". + if (auto size_to_end = ctx.view.size - temp_pos; size_to_end < 2) + return ParseState::FAIL; if (toLowerIfAlphaASCII(ctx.view.data[temp_pos]) == 'a') meridiem = 1; else if (toLowerIfAlphaASCII(ctx.view.data[temp_pos]) == 'p') meridiem = 2; - temp_pos += 1; // move forward - if (state = checkIfEnd(); state != ParseState::NORMAL) - return state; - if (toLowerIfAlphaASCII(ctx.view.data[temp_pos]) != 'm') + if (toLowerIfAlphaASCII(ctx.view.data[temp_pos + 1]) != 'm') meridiem = 0; switch (meridiem) { @@ -1384,7 +1385,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) time.hour += 12; break; } - temp_pos += 1; // move forward + temp_pos += 2; // move forward return ParseState::NORMAL; }; if (auto state = tryParse(); state == ParseState::FAIL) From ab8b63724ecc0a76d91bc76bf2febe3e83564b31 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Mon, 24 May 2021 18:57:12 +0800 Subject: [PATCH 3/5] Fix bug for str_to_date nullable input column Remove unreasonable include Signed-off-by: JaySon-Huang --- dbms/src/Flash/Coprocessor/TiDBBit.h | 1 - dbms/src/Flash/Coprocessor/TiDBEnum.h | 1 - dbms/src/Functions/FunctionsComparison.h | 2 -- dbms/src/Functions/FunctionsConversion.h | 9 +++++-- .../Storages/DeltaMerge/DeltaMergeHelpers.cpp | 3 ++- .../Storages/DeltaMerge/DeltaMergeHelpers.h | 2 -- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 1 + .../Storages/DeltaMerge/Index/MinMaxIndex.cpp | 2 ++ dbms/src/Storages/DeltaMerge/RowKeyRange.h | 1 + .../DeltaMerge/convertColumnTypeHelpers.cpp | 6 +++++ tests/fullstack-test/expr/str_to_date.test | 27 ++++++++++--------- 11 files changed, 34 insertions(+), 21 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/TiDBBit.h b/dbms/src/Flash/Coprocessor/TiDBBit.h index 442198e80da..5c1684a7068 100644 --- a/dbms/src/Flash/Coprocessor/TiDBBit.h +++ b/dbms/src/Flash/Coprocessor/TiDBBit.h @@ -5,7 +5,6 @@ #include #pragma GCC diagnostic pop -#include #include #include #include diff --git a/dbms/src/Flash/Coprocessor/TiDBEnum.h b/dbms/src/Flash/Coprocessor/TiDBEnum.h index 4c1679eba2c..71b98315fe4 100644 --- a/dbms/src/Flash/Coprocessor/TiDBEnum.h +++ b/dbms/src/Flash/Coprocessor/TiDBEnum.h @@ -5,7 +5,6 @@ #include #pragma GCC diagnostic pop -#include #include #include diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 3ec28334953..8f7846ce324 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -7,8 +7,6 @@ #include #include -#include - #include #include #include diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index e09f7b93280..83ec5f1c08b 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -1676,8 +1676,13 @@ class FunctionStrToDate : public IFunction { if (input_column->isColumnNullable()) { - null_res[i] = input_column->isNullAt(i); - continue; + // For null input, just set the result as null + if (bool is_null = input_column->isNullAt(i); is_null) + { + null_res[i] = is_null; + continue; + } + // else fallthrough to parsing } const auto str_ref = col_from->getDataAt(i); diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp index 5b1cb728942..ed1ab3de3b0 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp @@ -1,3 +1,4 @@ +#include #include namespace DB @@ -101,4 +102,4 @@ void appendIntoHandleColumn(ColumnVector::Container & handle_column, con } } // namespace DM -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h index d5ece791928..e7b3a9930bb 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h @@ -7,9 +7,7 @@ #include #include #include -#include #include -#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index e40c833b17e..7829c211050 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp index 18089f84ac6..c6d2f4922a2 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRange.h b/dbms/src/Storages/DeltaMerge/RowKeyRange.h index c82725100df..09ce340d9af 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRange.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRange.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB::DM { diff --git a/dbms/src/Storages/DeltaMerge/convertColumnTypeHelpers.cpp b/dbms/src/Storages/DeltaMerge/convertColumnTypeHelpers.cpp index b458fa79464..036794caf1c 100644 --- a/dbms/src/Storages/DeltaMerge/convertColumnTypeHelpers.cpp +++ b/dbms/src/Storages/DeltaMerge/convertColumnTypeHelpers.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + namespace DM { diff --git a/tests/fullstack-test/expr/str_to_date.test b/tests/fullstack-test/expr/str_to_date.test index 7bded2f0490..f9baeb89c27 100644 --- a/tests/fullstack-test/expr/str_to_date.test +++ b/tests/fullstack-test/expr/str_to_date.test @@ -11,30 +11,33 @@ func> wait_table test t # Note that we need to put `str_to_date` in group by to make sure it is pushed down ## Test suite 1 - Allow zero day -#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('0000/00/00', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by date order by a -mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('0000/00/00', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by date order by a +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('00/00/0000', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by a,date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('00/00/0000', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by a,date order by a +------------+------------+-----+ | a | date | cnt | +------------+------------+-----+ | 0/0/2012 | 2012-00-00 | 1 | -| 00/00/0000 | 0000-00-00 | 2 | +| 00/00/0000 | 0000-00-00 | 1 | | 13/05/2019 | 2019-05-13 | 1 | +| abc | 0000-00-00 | 1 | +------------+------------+-----+ ## Test suite 1 - Disallow zero day -#mysql> set sql_mode='NO_ZERO_IN_DATE,NO_ZERO_DATE'; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('0000/00/00', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by date order by a -mysql> set sql_mode='NO_ZERO_IN_DATE,NO_ZERO_DATE'; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('0000/00/00', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by date order by a +# The sql_mode does not effect the result set +#mysql> set sql_mode='NO_ZERO_IN_DATE,NO_ZERO_DATE'; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('00/00/0000', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by a,date order by a +mysql> set sql_mode='NO_ZERO_IN_DATE,NO_ZERO_DATE'; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('00/00/0000', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by a,date order by a +------------+------------+-----+ | a | date | cnt | +------------+------------+-----+ | 0/0/2012 | NULL | 1 | -| 00/00/0000 | NULL | 2 | +| 00/00/0000 | NULL | 1 | | 13/05/2019 | 2019-05-13 | 1 | +| abc | NULL | 1 | +------------+------------+-----+ ## Test suite 2 - showing datetime with fractions -#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, '%d/%b/%Y %H:%i:%S.%f') as date from test.t where suite = 2 group by date order by a -mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select /*+ agg_to_cop() */ a, str_to_date(a, '%d/%b/%Y %H:%i:%S.%f') as date from test.t where suite = 2 group by date order by a +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, '%d/%b/%Y %H:%i:%S.%f') as date from test.t where suite = 2 group by a,date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, str_to_date(a, '%d/%b/%Y %H:%i:%S.%f') as date from test.t where suite = 2 group by a,date order by a +-----------------------------+----------------------------+ | a | date | +-----------------------------+----------------------------+ @@ -44,8 +47,8 @@ mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='t +-----------------------------+----------------------------+ ## Test suite 3 - showing datetime without fractions -#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, ' %d/%b/%Y %H:%i:%S') as date from test.t where suite = 3 group by date order by a -mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, str_to_date(a, ' %d/%b/%Y %H:%i:%S') as date from test.t where suite = 3 group by date order by a +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, ' %d/%b/%Y %H:%i:%S') as date from test.t where suite = 3 group by a,date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, str_to_date(a, ' %d/%b/%Y %H:%i:%S') as date from test.t where suite = 3 group by a,date order by a +-------------------------+---------------------+ | a | date | +-------------------------+---------------------+ @@ -54,8 +57,8 @@ mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='t +-------------------------+---------------------+ ## Test suite 4 - showing date -#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, '%d/%b/%Y ') as date from test.t where suite = 4 group by date order by a -mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, str_to_date(a, '%d/%b/%Y ') as date from test.t where suite = 4 group by date order by a +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, '%d/%b/%Y ') as date from test.t where suite = 4 group by a,date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, str_to_date(a, '%d/%b/%Y ') as date from test.t where suite = 4 group by a,date order by a +---------------+------------+ | a | date | +---------------+------------+ From 229f388cd9ab090af276ca99063a75ef59f5404d Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 25 May 2021 15:04:35 +0800 Subject: [PATCH 4/5] Support calling strToDateDate on two column Signed-off-by: JaySon-Huang --- dbms/src/Common/MyTime.cpp | 2 +- dbms/src/Common/MyTime.h | 2 +- dbms/src/Functions/FunctionsConversion.h | 83 ++++++++++++++++++------ 3 files changed, 65 insertions(+), 22 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 57292e72fa5..9727c163741 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -1471,7 +1471,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) } // Refer: https://github.com/pingcap/tidb/blob/v5.0.1/types/time.go#L2946 -MyDateTimeParser::MyDateTimeParser(const String & format_) : format(format_) +MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_)) { // Ignore all prefix white spaces (TODO: handle unicode space?) size_t format_pos = 0; diff --git a/dbms/src/Common/MyTime.h b/dbms/src/Common/MyTime.h index 7543636176b..e337013f45f 100644 --- a/dbms/src/Common/MyTime.h +++ b/dbms/src/Common/MyTime.h @@ -136,7 +136,7 @@ struct MyDateTimeFormatter struct MyDateTimeParser { - explicit MyDateTimeParser(const String & format_); + explicit MyDateTimeParser(String format_); std::optional parseAsPackedUInt(const StringRef & str_view) const; diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 83ec5f1c08b..8a0c88c9ffb 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -1619,20 +1619,20 @@ class FunctionStrToDate : public IFunction if (!removeNullable(arguments[0].type)->isString()) throw Exception("First argument for function " + getName() + " must be String, but get " + arguments[0].type->getName(), ErrorCodes::ILLEGAL_COLUMN); - if (!arguments[1].type->isString()) + if (!removeNullable(arguments[1].type)->isString()) throw Exception( - "Second argument for function " + getName() + " must be String constant, but get " + arguments[1].type->getName(), + "Second argument for function " + getName() + " must be String, but get " + arguments[1].type->getName(), ErrorCodes::ILLEGAL_COLUMN); if constexpr (std::is_same_v) { - // FIXME: Should it be nullable for invalid result? + // Return null for invalid result // FIXME: set fraction for DataTypeMyDateTime return makeNullable(std::make_shared()); } else if constexpr (std::is_same_v) { - // FIXME: Should it be nullable for invalid result? + // Return null for invalid result return makeNullable(std::make_shared()); } else @@ -1643,21 +1643,20 @@ class FunctionStrToDate : public IFunction // FIXME: Should we override other method? bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override { const auto & input_column = block.getByPosition(arguments[0]).column; const size_t num_rows = input_column->size(); - const ColumnString * col_from = nullptr; + const ColumnString * input_raw_col = nullptr; if (input_column->isColumnNullable()) { auto null_input_column = checkAndGetColumn(input_column.get()); - col_from = checkAndGetColumn(null_input_column->getNestedColumnPtr().get()); + input_raw_col = checkAndGetColumn(null_input_column->getNestedColumnPtr().get()); } else { - col_from = checkAndGetColumn(input_column.get()); + input_raw_col = checkAndGetColumn(input_column.get()); } auto datetime_column = ColumnVector::create(num_rows); @@ -1665,14 +1664,14 @@ class FunctionStrToDate : public IFunction auto null_column = ColumnUInt8::create(num_rows); auto & null_res = null_column->getData(); - const auto & format_col = block.getByPosition(arguments[1]).column; - if (format_col->isColumnConst()) + const auto & format_column = block.getByPosition(arguments[1]).column; + if (format_column->isColumnConst()) { - const auto & col_const = checkAndGetColumnConst(format_col.get()); + // Precomplie format parser + const auto & col_const = checkAndGetColumnConst(format_column.get()); auto format = col_const->getValue(); - auto parser = MyDateTimeParser(format); - for (size_t i = 0; i < num_rows; i++) + for (size_t i = 0; i < num_rows; ++i) { if (input_column->isColumnNullable()) { @@ -1684,8 +1683,7 @@ class FunctionStrToDate : public IFunction } // else fallthrough to parsing } - - const auto str_ref = col_from->getDataAt(i); + const auto str_ref = input_raw_col->getDataAt(i); if (auto parse_res = parser.parseAsPackedUInt(str_ref); parse_res) { datetime_res[i] = *parse_res; @@ -1697,13 +1695,58 @@ class FunctionStrToDate : public IFunction null_res[i] = 1; } } - block.getByPosition(result).column = ColumnNullable::create(std::move(datetime_column), std::move(null_column)); - } + } // end of format_column->isColumnConst() else { - // TODO: the second argument could be a column, support it later. - throw Exception("Second argument for function " + getName() + " must be String constant", ErrorCodes::ILLEGAL_COLUMN); - } + const ColumnString * format_raw_col = nullptr; + if (format_column->isColumnNullable()) + { + auto null_format_column = checkAndGetColumn(format_column.get()); + format_raw_col = checkAndGetColumn(null_format_column->getNestedColumnPtr().get()); + } + else + { + format_raw_col = checkAndGetColumn(format_column.get()); + } + + for (size_t i = 0; i < num_rows; ++i) + { + // Set null for either null input or null format + if (input_column->isColumnNullable()) + { + if (bool is_null = input_column->isNullAt(i); is_null) + { + null_res[i] = is_null; + continue; + } + // else fallthrough to parsing + } + if (format_column->isColumnNullable()) + { + if (bool is_null = format_column->isNullAt(i); is_null) + { + null_res[i] = is_null; + continue; + } + // else fallthrough to parsing + } + + const auto format_ref = format_raw_col->getDataAt(i); + auto parser = MyDateTimeParser(format_ref.toString()); + const auto str_ref = input_raw_col->getDataAt(i); + if (auto parse_res = parser.parseAsPackedUInt(str_ref); parse_res) + { + datetime_res[i] = *parse_res; + null_res[i] = 0; + } + else + { + datetime_res[i] = 0; + null_res[i] = 1; + } + } + } // end of !format_column->isColumnConst() + block.getByPosition(result).column = ColumnNullable::create(std::move(datetime_column), std::move(null_column)); } }; From 4893a340e18f1276d3206f597a3871b1fdfdfd80 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Tue, 25 May 2021 18:10:16 +0800 Subject: [PATCH 5/5] Port test cases from mysql; Fix bug for parsing "03-01-02 8:11:2.123456" with format "%Y-%m-%d %H:%i:%S.%#" Signed-off-by: JaySon-Huang --- dbms/src/Common/MyTime.cpp | 24 +++---- dbms/src/Common/tests/gtest_mytime.cpp | 90 +++++++++++++++++++++----- 2 files changed, 82 insertions(+), 32 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 9727c163741..deb90252135 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -1267,21 +1267,15 @@ static std::tuple parseNDigits(const StringRef & view, const si static std::tuple parseYearNDigits(const StringRef & view, const size_t pos, const size_t limit) { - int32_t effective_count = 0; - int32_t effective_value = 0; - while (static_cast(effective_count + 1) <= limit) - { - auto [step, num] = parseNDigits(view, pos, effective_count + 1); - if (step == 0) - break; - effective_count++; - effective_value = num; - } - if (effective_count == 0) - return std::make_tuple(effective_count, 0); - else if (effective_count <= 2) - effective_value = adjustYear(effective_value); - return std::make_tuple(effective_count, effective_value); + // Try to parse a "year" within `limit` digits + size_t step = 0; + int32_t year = 0; + std::tie(step, year) = parseNDigits(view, pos, limit); + if (step == 0) + return std::make_tuple(step, 0); + else if (step <= 2) + year = adjustYear(year); + return std::make_tuple(step, year); } enum class ParseState diff --git a/dbms/src/Common/tests/gtest_mytime.cpp b/dbms/src/Common/tests/gtest_mytime.cpp index 3a363111f99..31647d0787c 100644 --- a/dbms/src/Common/tests/gtest_mytime.cpp +++ b/dbms/src/Common/tests/gtest_mytime.cpp @@ -230,7 +230,7 @@ try {" 2/Jun", "%d/%b/%Y", MyDateTime{0, 6, 2, 0, 0, 0, 0}}, // More patterns than input string {" liter", "lit era l", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, // More patterns than input string // Test case for empty input - {" ", " ", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, + {" ", " ", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, // {" ", "%d/%b/%Y", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, // Prefix white spaces should be ignored {" 2/Jun/2019 ", "%d/%b/%Y", MyDateTime{2019, 6, 2, 0, 0, 0, 0}}, @@ -248,8 +248,8 @@ try MyDateTime{2016, 2, 30, 12, 34, 56, 123400}}, // Feb 30th (not exist in actual) is valid for parsing (in mariadb) {"31/April/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", MyDateTime{2016, 4, 31, 12, 34, 56, 0}}, // April 31th (not exist in actual) {"01,5,2013 9", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 900000}}, - {"01,52013", "%d,%c%Y", std::nullopt}, // %c will try to parse '52' as month and fail - {"01,5,2013", "%d,%c,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, + {"01,52013", "%d,%c%Y", std::nullopt}, // %c will try to parse '52' as month and fail + {"01,5,2013", "%d,%c,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, // {"01,5,2013 ", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, /// Test cases for AM/PM set @@ -280,9 +280,9 @@ try {"12: 13:56 AM 13/05/2019", "%r%d/%c/%Y", MyDateTime{2019, 5, 13, 0, 13, 56, 0}}, {"12:13 :56 pm 13/05/2019", "%r %d/%c/%Y", MyDateTime{2019, 5, 13, 12, 13, 56, 0}}, {"11:13: 56pm 13/05/2019", "%r %d/%c/%Y", MyDateTime{2019, 5, 13, 23, 13, 56, 0}}, - {"11:13:56a", "%r", std::nullopt}, // EOF while parsing "AM"/"PM" - {"11:13", "%r", MyDateTime{0, 0, 0, 11, 13, 0, 0}}, - {"11:", "%r", MyDateTime{0, 0, 0, 11, 0, 0, 0}}, + {"11:13:56a", "%r", std::nullopt}, // EOF while parsing "AM"/"PM" + {"11:13", "%r", MyDateTime{0, 0, 0, 11, 13, 0, 0}}, // + {"11:", "%r", MyDateTime{0, 0, 0, 11, 0, 0, 0}}, // {"12", "%r", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, /// Test cases for %T @@ -290,7 +290,7 @@ try {"23: 13:56 13/05/2019", "%T%d/%c/%Y", MyDateTime{2019, 5, 13, 23, 13, 56, 0}}, {"12:13 :56 13/05/2019", "%T %d/%c/%Y", MyDateTime{2019, 5, 13, 12, 13, 56, 0}}, {"19:13: 56 13/05/2019", "%T %d/%c/%Y", MyDateTime{2019, 5, 13, 19, 13, 56, 0}}, - {"21:13", "%T", MyDateTime{0, 0, 0, 21, 13, 0, 0}}, + {"21:13", "%T", MyDateTime{0, 0, 0, 21, 13, 0, 0}}, // {"21:", "%T", MyDateTime{0, 0, 0, 21, 0, 0, 0}}, // mutiple chars between pattern @@ -301,16 +301,15 @@ try {"01/Feb /2016 ab cdefg 23: 45:54", "%d /%b/%Y abc defg %H:%i :%S", MyDateTime{2016, 2, 1, 23, 45, 54, 0}}, /// Cases collect from MySQL 8.0 document - {"01,5,2013", "%d,%m,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, - {"May 1, 2013", "%M %d,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, - {"a09:30:17", "a%h:%i:%s", MyDateTime{0, 0, 0, 9, 30, 17, 0}}, - {"a09:30:17", "%h:%i:%s", std::nullopt}, - {"09:30:17a", "%h:%i:%s", MyDateTime{0, 0, 0, 9, 30, 17, 0}}, - {"abc", "abc", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, - {"9", "%m", MyDateTime{0, 9, 0, 0, 0, 0, 0}}, - {"9", "%s", MyDateTime{0, 0, 0, 0, 0, 9, 0}}, - // Range checking on the parts of date values is as described in Section 11.2.2, “The DATE, DATETIME, and TIMESTAMP Types”. This means, for example, that “zero” dates or dates with part values of 0 are permitted unless the SQL mode is set to disallow such values. - {"00/00/0000", "%m/%d/%Y", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, + {"01,5,2013", "%d,%m,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, // + {"May 1, 2013", "%M %d,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, // + {"a09:30:17", "a%h:%i:%s", MyDateTime{0, 0, 0, 9, 30, 17, 0}}, // + {"a09:30:17", "%h:%i:%s", std::nullopt}, // + {"09:30:17a", "%h:%i:%s", MyDateTime{0, 0, 0, 9, 30, 17, 0}}, // + {"abc", "abc", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, // + {"9", "%m", MyDateTime{0, 9, 0, 0, 0, 0, 0}}, // + {"9", "%s", MyDateTime{0, 0, 0, 0, 0, 9, 0}}, // + {"00/00/0000", "%m/%d/%Y", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, // {"04/31/2004", "%m/%d/%Y", MyDateTime{2004, 4, 31, 0, 0, 0, 0}}, /// Below cases are ported from TiDB @@ -337,6 +336,63 @@ try {"abcd-2020-10-10.1", "%@-%Y-%m-%d%.%#%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, {"2020-10-10", "%Y-%m-%d%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, {"2020-10-10abcde123abcdef", "%Y-%m-%d%@%#", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + + /// Cases ported from mysql testing by executing following SQL in mysql + /// create table t1 (date char(30) COLLATE latin1_bin, format char(30) COLLATE latin1_bin not null); + /// insert into t1 values (...),... + /// select if( str_to_date is not null, concat( '{"', date, '", "', format, '", ', concat( "MyDateTime{", year(str_to_date), date_format(str_to_date, ",%c,%e,"), hour(str_to_date), ",", minute(str_to_date), ",", second(str_to_date), ",", MICROSECOND(str_to_date), "}" ), '}, //' ), concat( '{"', date, '", "', format, '", std::nullopt}, //' )) as s from ( select date, format, str_to_date(date, format) as str_to_date from t1 ) a group by date, format, str_to_date order by date; + {"0003-01-02 8:11:2.123456", "%Y-%m-%d %H:%i:%S.%#", MyDateTime{3, 1, 2, 8, 11, 2, 0}}, // + {"03-01-02 8:11:2.123456", "%Y-%m-%d %H:%i:%S.%#", MyDateTime{2003, 1, 2, 8, 11, 2, 0}}, // + {"03-01-02 8:11:2.123456", "%y-%m-%d %H:%i:%S.%#", MyDateTime{2003, 1, 2, 8, 11, 2, 0}}, // + {"10:20:10", "%H:%i:%s", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"10:20:10", "%T", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"10:20:10", "%h:%i:%s.%f", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"10:20:10.44AM", "%h:%i:%s.%f%p", MyDateTime{0, 0, 0, 10, 20, 10, 440000}}, // + {"10:20:10AM", "%h:%i:%s%p", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"10:20:10AM", "%r", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"15 MAY 2001", "%d %b %Y", MyDateTime{2001, 5, 15, 0, 0, 0, 0}}, // + // {"15 SEPTEMB 2001", "%d %M %Y", MyDateTime{2001, 9, 15, 0, 0, 0, 0}}, // The SEPTEMB is a broken string of 'SEPTEMBER', ignore this case + {"15 September 2001", "%d %M %Y", MyDateTime{2001, 9, 15, 0, 0, 0, 0}}, // + {"15-01-20", "%d-%m-%y", MyDateTime{2020, 1, 15, 0, 0, 0, 0}}, // + {"15-01-2001", "%d-%m-%Y %H:%i:%S", MyDateTime{2001, 1, 15, 0, 0, 0, 0}}, // + {"15-01-2001 12:59:58", "%d-%m-%Y %H:%i:%S", MyDateTime{2001, 1, 15, 12, 59, 58, 0}}, // + {"15-2001-1", "%d-%Y-%c", MyDateTime{2001, 1, 15, 0, 0, 0, 0}}, // + {"2003-01-02 01:11:12.12345AM", "%Y-%m-%d %h:%i:%S.%f%p", MyDateTime{2003, 1, 2, 1, 11, 12, 123450}}, // + {"2003-01-02 02:11:12.12345AM", "%Y-%m-%d %h:%i:%S.%f %p", MyDateTime{2003, 1, 2, 2, 11, 12, 123450}}, // + {"2003-01-02 10:11:12", "%Y-%m-%d %H:%i:%S", MyDateTime{2003, 1, 2, 10, 11, 12, 0}}, // + {"2003-01-02 10:11:12 PM", "%Y-%m-%d %h:%i:%S %p", MyDateTime{2003, 1, 2, 22, 11, 12, 0}}, // + {"2003-01-02 11:11:12Pm", "%Y-%m-%d %h:%i:%S%p", MyDateTime{2003, 1, 2, 23, 11, 12, 0}}, // + {"2003-01-02 12:11:12.12345 am", "%Y-%m-%d %h:%i:%S.%f%p", MyDateTime{2003, 1, 2, 0, 11, 12, 123450}}, // + // some cases that are not implemented + // {"060 2004", "%j %Y", MyDateTime{2004, 2, 29, 0, 0, 0, 0}}, // + // {"15th May 2001", "%D %b %Y", MyDateTime{2001, 5, 15, 0, 0, 0, 0}}, // + // {"4 53 1998", "%w %u %Y", MyDateTime{1998, 12, 31, 0, 0, 0, 0}}, // + // {"Sund 15 MAY 2001", "%W %d %b %Y", MyDateTime{2001, 5, 15, 0, 0, 0, 0}}, // + // {"Sunday 01 2001", "%W %v %x", MyDateTime{2001, 1, 7, 0, 0, 0, 0}}, // + // {"Sunday 15 MAY 2001", "%W %d %b %Y", MyDateTime{2001, 5, 15, 0, 0, 0, 0}}, // + // {"Thursday 53 1998", "%W %u %Y", MyDateTime{1998, 12, 31, 0, 0, 0, 0}}, // + // {"Tuesday 00 2002", "%W %U %Y", MyDateTime{2002, 1, 1, 0, 0, 0, 0}}, // + // {"Tuesday 52 2001", "%W %V %X", MyDateTime{2002, 1, 1, 0, 0, 0, 0}}, // + // Test 'maybe' date formats and 'strange but correct' results + {"03-01-02 10:11:12 PM", "%Y-%m-%d %h:%i:%S %p", MyDateTime{2003, 1, 2, 22, 11, 12, 0}}, // + {"10:20:10AM", "%h:%i:%s", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"2003-01-02 10:11:12", "%Y-%m-%d %h:%i:%S", MyDateTime{2003, 1, 2, 10, 11, 12, 0}}, // + // Test wrong dates or converion specifiers + {"10:20:10AM", "%H:%i:%s%p", std::nullopt}, // + {"15 Ju 2001", "%d %M %Y", std::nullopt}, // + {"15 Septembei 2001", "%d %M %Y", std::nullopt}, // + {"2003-01-02 10:11:12 PM", "%Y-%m-%d %H:%i:%S %p", std::nullopt}, // + {"2003-01-02 10:11:12 PM", "%y-%m-%d %H:%i:%S %p", std::nullopt}, // + {"2003-01-02 10:11:12.123456", "%Y-%m-%d %h:%i:%S %p", std::nullopt}, // + {"2003-01-02 10:11:12AM", "%Y-%m-%d %h:%i:%S.%f %p", std::nullopt}, // + {"2003-01-02 10:11:12AN", "%Y-%m-%d %h:%i:%S%p", std::nullopt}, // + // {"7 53 1998", "%w %u %Y", std::nullopt}, // + // {"Sund 15 MA", "%W %d %b %Y", std::nullopt}, // + // {"Sunday 01 2001", "%W %v %X", std::nullopt}, // + // {"Thursdai 12 1998", "%W %u %Y", std::nullopt}, // + // {"Tuesday 52 2001", "%W %V %Y", std::nullopt}, // + // {"Tuesday 52 2001", "%W %V %x", std::nullopt}, // + // {"Tuesday 52 2001", "%W %u %x", std::nullopt}, // }; auto result_formatter = MyDateTimeFormatter("%Y/%m/%d %T.%f"); size_t idx = 0;