diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index b249d2db136..deb90252135 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -1,6 +1,10 @@ #include +#include +#include #include #include +#include +#include #include #include @@ -9,7 +13,15 @@ namespace DB { -int adjustYear(int year) +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int NOT_IMPLEMENTED; +} // namespace ErrorCodes + +// adjustYear adjusts year according to y. +// See https://dev.mysql.com/doc/refman/5.7/en/two-digit-years.html +int32_t adjustYear(int32_t year) { if (year >= 0 && year <= 69) return 2000 + year; @@ -1217,4 +1229,710 @@ MyDateTimeFormatter::MyDateTimeFormatter(const String & layout) } } +struct MyDateTimeParser::Context +{ + // Some state for `mysqlTimeFix` + uint32_t state = 0; + static constexpr uint32_t ST_DAY_OF_YEAR = 0x01; + static constexpr uint32_t ST_MERIDIEM = 0x02; + static constexpr uint32_t ST_HOUR_0_23 = 0x04; + static constexpr uint32_t ST_HOUR_1_12 = 0x08; + + int32_t day_of_year = 0; + // 0 - invalid, 1 - am, 2 - pm + int32_t meridiem = 0; + + // The input string view + const StringRef view; + // The pos we are parsing from + size_t pos = 0; + + Context(StringRef view_) : view(std::move(view_)) {} +}; + +// Try to parse digits with number of `limit` starting from view[pos] +// Return if success. +// Return <0, _> if fail. +static std::tuple parseNDigits(const StringRef & view, const size_t pos, const size_t limit) +{ + size_t step = 0; + int32_t num = 0; + while (step < limit && (pos + step) < view.size && isNumericASCII(view.data[pos + step])) + { + num = num * 10 + (view.data[pos + step] - '0'); + step += 1; + } + return std::make_tuple(step, num); +} + +static std::tuple parseYearNDigits(const StringRef & view, const size_t pos, const size_t limit) +{ + // Try to parse a "year" within `limit` digits + size_t step = 0; + int32_t year = 0; + std::tie(step, year) = parseNDigits(view, pos, limit); + if (step == 0) + return std::make_tuple(step, 0); + else if (step <= 2) + year = adjustYear(year); + return std::make_tuple(step, year); +} + +enum class ParseState +{ + NORMAL = 0, // Parsing + FAIL = 1, // Fail to parse + END_OF_FILE = 2, // The end of input +}; + +//"%r": Time, 12-hour (hh:mm:ss followed by AM or PM) +static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) +{ + // Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure + size_t temp_pos = ctx.pos; + auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState { + // To the end + if (temp_pos == ctx.view.size) + return ParseState::END_OF_FILE; + return ParseState::NORMAL; + }; + auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { + while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) + ++temp_pos; + return checkIfEnd(); + }; + auto parseSep = [&temp_pos, &ctx, &checkIfEnd, &skipWhitespaces]() -> ParseState { + if (skipWhitespaces() == ParseState::END_OF_FILE) + return ParseState::END_OF_FILE; + // parse ":" + if (ctx.view.data[temp_pos] != ':') + return ParseState::FAIL; + temp_pos += 1; // move forward + return ParseState::NORMAL; + }; + auto tryParse = [&]() -> ParseState { + ParseState state = ParseState::NORMAL; + /// Note that we should update `time` as soon as possible, or we + /// can not get correct result for incomplete input like "12:13" + /// that is less than "hh:mm:ssAM" + + // hh + size_t step = 0; + int32_t hour = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || hour > 12 || hour == 0) + return ParseState::FAIL; + // Handle special case: 12:34:56 AM -> 00:34:56 + // For PM, we will add 12 it later + if (hour == 12) + hour = 0; + time.hour = hour; + temp_pos += step; // move forward + + if (state = parseSep(); state != ParseState::NORMAL) + return state; + + int32_t minute = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || minute > 59) + return ParseState::FAIL; + time.minute = minute; + temp_pos += step; // move forward + + if (state = parseSep(); state != ParseState::NORMAL) + return state; + + int32_t second = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || second > 59) + return ParseState::FAIL; + time.second = second; + temp_pos += step; // move forward + + int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + // "AM"/"PM" must be parsed as a single element + // "11:13:56a" is an invalid input for "%r". + if (auto size_to_end = ctx.view.size - temp_pos; size_to_end < 2) + return ParseState::FAIL; + if (toLowerIfAlphaASCII(ctx.view.data[temp_pos]) == 'a') + meridiem = 1; + else if (toLowerIfAlphaASCII(ctx.view.data[temp_pos]) == 'p') + meridiem = 2; + + if (toLowerIfAlphaASCII(ctx.view.data[temp_pos + 1]) != 'm') + meridiem = 0; + switch (meridiem) + { + case 0: + return ParseState::FAIL; + case 1: + break; + case 2: + time.hour += 12; + break; + } + temp_pos += 2; // move forward + return ParseState::NORMAL; + }; + if (auto state = tryParse(); state == ParseState::FAIL) + return false; + // Other state, forward the `ctx.pos` and return true + ctx.pos = temp_pos; + return true; +} + +//"%T": Time, 24-hour (hh:mm:ss) +static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) +{ + // Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure + size_t temp_pos = ctx.pos; + auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState { + // To the end + if (temp_pos == ctx.view.size) + return ParseState::END_OF_FILE; + return ParseState::NORMAL; + }; + auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { + while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) + ++temp_pos; + return checkIfEnd(); + }; + auto parseSep = [&temp_pos, &ctx, &checkIfEnd, &skipWhitespaces]() -> ParseState { + if (skipWhitespaces() == ParseState::END_OF_FILE) + return ParseState::END_OF_FILE; + // parse ":" + if (ctx.view.data[temp_pos] != ':') + return ParseState::FAIL; + temp_pos += 1; // move forward + return ParseState::NORMAL; + }; + auto tryParse = [&]() -> ParseState { + ParseState state = ParseState::NORMAL; + /// Note that we should update `time` as soon as possible, or we + /// can not get correct result for incomplete input like "12:13" + /// that is less than "hh:mm:ss" + + // hh + size_t step = 0; + int32_t hour = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || hour > 23) + return ParseState::FAIL; + time.hour = hour; + temp_pos += step; // move forward + + if (state = parseSep(); state != ParseState::NORMAL) + return state; + + int32_t minute = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || minute > 59) + return ParseState::FAIL; + time.minute = minute; + temp_pos += step; // move forward + + if (state = parseSep(); state != ParseState::NORMAL) + return state; + + int32_t second = 0; + if (state = skipWhitespaces(); state != ParseState::NORMAL) + return state; + std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2); + if (step == 0 || second > 59) + return ParseState::FAIL; + time.second = second; + temp_pos += step; // move forward + + return ParseState::NORMAL; + }; + if (auto state = tryParse(); state == ParseState::FAIL) + return false; + // Other state, forward the `ctx.pos` and return true + ctx.pos = temp_pos; + return true; +} + +// Refer: https://github.com/pingcap/tidb/blob/v5.0.1/types/time.go#L2946 +MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_)) +{ + // Ignore all prefix white spaces (TODO: handle unicode space?) + size_t format_pos = 0; + while (format_pos < format.size() && isWhitespaceASCII(format[format_pos])) + format_pos++; + + bool in_pattern_match = false; + while (format_pos < format.size()) + { + char x = format[format_pos]; + if (in_pattern_match) + { + switch (x) + { + case 'b': + { + //"%b": Abbreviated month name (Jan..Dec) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + size_t step = 0; + auto v = removePrefix(ctx.view, ctx.pos); + for (size_t p = 0; p < 12; p++) + { + if (startsWithCI(v, abbrev_month_names[p])) + { + time.month = p + 1; + step = abbrev_month_names[p].size(); + break; + } + } + if (step == 0) + return false; + ctx.pos += step; + return true; + }); + break; + } + case 'c': + { + //"%c": Month, numeric (0..12) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + // To be compatible with TiDB & MySQL, first try to take two digit and parse it as `num` + auto [step, month] = parseNDigits(ctx.view, ctx.pos, 2); + // Then check whether num is valid month + // Note that 0 is valid when sql_mode does not contain NO_ZERO_IN_DATE,NO_ZERO_DATE + if (step == 0 || month > 12) + return false; + time.month = month; + ctx.pos += step; + return true; + }); + break; + } + case 'd': //"%d": Day of the month, numeric (00..31) + [[fallthrough]]; + case 'e': //"%e": Day of the month, numeric (0..31) + { + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, day] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || day > 31) + return false; + time.day = day; + ctx.pos += step; + return true; + }); + break; + } + case 'f': + { + //"%f": Microseconds (000000..999999) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, ms] = parseNDigits(ctx.view, ctx.pos, 6); + // Empty string is a valid input + if (step == 0) + { + time.micro_second = 0; + return true; + } + // The siffix '0' can be ignored. + // "9" means 900000 + while (ms > 0 && ms * 10 < 1000000) + { + ms *= 10; + } + time.micro_second = ms; + ctx.pos += step; + return true; + }); + break; + } + case 'k': + //"%k": Hour (0..23) + [[fallthrough]]; + case 'H': + { + //"%H": Hour (00..23) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, hour] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || hour > 23) + return false; + ctx.state |= MyDateTimeParser::Context::ST_HOUR_0_23; + time.hour = hour; + ctx.pos += step; + return true; + }); + break; + } + case 'l': + //"%l": Hour (1..12) + [[fallthrough]]; + case 'I': + //"%I": Hour (01..12) + [[fallthrough]]; + case 'h': + { + //"%h": Hour (01..12) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, hour] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || hour <= 0 || hour > 12) + return false; + ctx.state |= MyDateTimeParser::Context::ST_HOUR_1_12; + time.hour = hour; + ctx.pos += step; + return true; + }); + break; + } + case 'i': + { + //"%i": Minutes, numeric (00..59) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, num] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || num > 59) + return false; + time.minute = num; + ctx.pos += step; + return true; + }); + break; + } + case 'j': + { + //"%j": Day of year (001..366) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { + auto [step, num] = parseNDigits(ctx.view, ctx.pos, 3); + if (step == 0 || num == 0 || num > 366) + return false; + ctx.state |= MyDateTimeParser::Context::ST_DAY_OF_YEAR; + ctx.day_of_year = num; + ctx.pos += step; + return true; + }); + break; + } + case 'M': + { + //"%M": Month name (January..December) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto v = removePrefix(ctx.view, ctx.pos); + size_t step = 0; + for (size_t p = 0; p < 12; p++) + { + if (startsWithCI(v, month_names[p])) + { + time.month = p + 1; + step = month_names[p].size(); + break; + } + } + if (step == 0) + return false; + ctx.pos += step; + return true; + }); + break; + } + case 'm': + { + //"%m": Month, numeric (00..12) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, month] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || month > 12) + return false; + time.month = month; + ctx.pos += step; + return true; + }); + break; + } + case 'S': + //"%S": Seconds (00..59) + [[fallthrough]]; + case 's': + { + //"%s": Seconds (00..59) + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, second] = parseNDigits(ctx.view, ctx.pos, 2); + if (step == 0 || second > 59) + return false; + time.second = second; + ctx.pos += step; + return true; + }); + break; + } + case 'p': + { + //"%p": AM or PM + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { + // Check the offset that will visit + if (ctx.view.size - ctx.pos < 2) + return false; + + int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm + if (toLowerIfAlphaASCII(ctx.view.data[ctx.pos]) == 'a') + meridiem = 1; + else if (toLowerIfAlphaASCII(ctx.view.data[ctx.pos]) == 'p') + meridiem = 2; + + if (toLowerIfAlphaASCII(ctx.view.data[ctx.pos + 1]) != 'm') + meridiem = 0; + + if (meridiem == 0) + return false; + + ctx.state |= MyDateTimeParser::Context::ST_MERIDIEM; + ctx.meridiem = meridiem; + ctx.pos += 2; + return true; + }); + break; + } + case 'r': + { + //"%r": Time, 12-hour (hh:mm:ss followed by AM or PM) + parsers.emplace_back(parseTime12Hour); + break; + } + case 'T': + { + //"%T": Time, 24-hour (hh:mm:ss) + parsers.emplace_back(parseTime24Hour); + break; + } + case 'Y': + { + //"%Y": Year, numeric, four digits + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, year] = parseYearNDigits(ctx.view, ctx.pos, 4); + if (step == 0) + return false; + time.year = year; + ctx.pos += step; + return true; + }); + break; + } + case 'y': + { + //"%y": Year, numeric, two digits. Deprecated since MySQL 5.7.5 + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { + auto [step, year] = parseYearNDigits(ctx.view, ctx.pos, 2); + if (step == 0) + return false; + time.year = year; + ctx.pos += step; + return true; + }); + break; + } + case '#': + { + //"%#": Skip all numbers + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { + // TODO: Does ASCII numeric the same with unicode numeric? + size_t temp_pos = ctx.pos; + while (temp_pos < ctx.view.size && isNumericASCII(ctx.view.data[temp_pos])) + temp_pos++; + ctx.pos = temp_pos; + return true; + }); + break; + } + case '.': + { + //"%.": Skip all punctation characters + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { + // TODO: Does ASCII punctuation the same with unicode punctuation? + size_t temp_pos = ctx.pos; + while (temp_pos < ctx.view.size && isPunctuation(ctx.view.data[temp_pos])) + temp_pos++; + ctx.pos = temp_pos; + return true; + }); + break; + } + case '@': + { + //"%@": Skip all alpha characters + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { + // TODO: Does ASCII alpha the same with unicode alpha? + size_t temp_pos = ctx.pos; + while (temp_pos < ctx.view.size && isAlphaASCII(ctx.view.data[temp_pos])) + temp_pos++; + ctx.pos = temp_pos; + return true; + }); + break; + } + case '%': + { + //"%%": A literal % character + parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase &) -> bool { +#if 0 + if (ctx.view.data[ctx.pos] != '%') + return false; + ctx.pos++; + return true; +#else + // FIXME: Ignored by now, both tidb 5.0.0 and mariadb 10.3.14 can not handle it + std::ignore = ctx; + return false; +#endif + }); + break; + } + default: + throw Exception( + "Unknown date format pattern, [format=" + format + "] [pattern=" + x + "] [pos=" + DB::toString(format_pos) + "]", + ErrorCodes::BAD_ARGUMENTS); + } + // end the state of pattern match + in_pattern_match = false; + // move format_pos forward + format_pos++; + continue; + } + + if (x == '%') + { + in_pattern_match = true; + // move format_pos forward + format_pos++; + } + else + { + // Ignore whitespace for literal forwarding (TODO: handle unicode space?) + while (format_pos < format.size() && isWhitespaceASCII(format[format_pos])) + format_pos++; + // Move forward ctx.view with a sequence of literal `format[format_pos:span_end]` + size_t span_end = format_pos; + while (span_end < format.size() && format[span_end] != '%' && !isWhitespaceASCII(format[span_end])) + ++span_end; + const size_t span_size = span_end - format_pos; + if (span_size > 0) + { + StringRef format_view{format.data() + format_pos, span_size}; + parsers.emplace_back([format_view](MyDateTimeParser::Context & ctx, MyTimeBase &) { + assert(format_view.size > 0); + if (format_view.size == 1) + { + // Shortcut for only 1 char + if (ctx.view.data[ctx.pos] != format_view.data[0]) + return false; + ctx.pos += 1; + return true; + } + // Try best to match input as most literal as possible + auto v = removePrefix(ctx.view, ctx.pos); + size_t v_step = 0; + for (size_t format_step = 0; format_step < format_view.size; ++format_step) + { + // Ignore prefix whitespace for input + while (v_step < v.size && isWhitespaceASCII(v.data[v_step])) + ++v_step; + if (v_step == v.size) // To the end + break; + // Try to match literal + if (v.data[v_step] != format_view.data[format_step]) + return false; + ++v_step; + } + ctx.pos += v_step; + return true; + }); + } + // move format_pos forward + format_pos = span_end; + } + } +} + +bool mysqlTimeFix(const MyDateTimeParser::Context & ctx, MyTimeBase & my_time) +{ + // TODO: Implement the function that converts day of year to yy:mm:dd + if (ctx.state & MyDateTimeParser::Context::ST_DAY_OF_YEAR) + { + // %j Day of year (001..366) set + throw Exception("%j set, parsing day of year is not implemented", ErrorCodes::NOT_IMPLEMENTED); + } + + if (ctx.state & MyDateTimeParser::Context::ST_MERIDIEM) + { + // %H (00..23) set, should not set AM/PM + if (ctx.state & MyDateTimeParser::Context::ST_HOUR_0_23) + return false; + if (my_time.hour == 0) + return false; + if (my_time.hour == 12) + { + // 12 is a special hour. + if (ctx.meridiem == 1) // AM + my_time.hour = 0; + else if (ctx.meridiem == 2) // PM + my_time.hour = 12; + return true; + } + if (ctx.meridiem == 2) // PM + my_time.hour += 12; + } + else + { + // %h (01..12) set + if ((ctx.state & MyDateTimeParser::Context::ST_HOUR_1_12) && my_time.hour == 12) + my_time.hour = 0; // why? + } + return true; +} + +std::optional MyDateTimeParser::parseAsPackedUInt(const StringRef & str_view) const +{ + MyTimeBase my_time{0, 0, 0, 0, 0, 0, 0}; + MyDateTimeParser::Context ctx(str_view); + + // TODO: can we return warnings to TiDB? + for (auto & f : parsers) + { + // Ignore all prefix white spaces before each pattern match (TODO: handle unicode space?) + while (ctx.pos < str_view.size && isWhitespaceASCII(str_view.data[ctx.pos])) + ctx.pos++; + // To the end of input, exit (successfully) even if there is more patterns to match + if (ctx.pos == ctx.view.size) + break; + + if (f(ctx, my_time) != true) + { +#ifndef NDEBUG + LOG_TRACE(&Logger::get("MyDateTimeParser"), + "parse error, [str=" << ctx.view.toString() << "] [format=" << format << "] [parse_pos=" << ctx.pos << "]"); +#endif + return std::nullopt; + } + + // `ctx.pos` > `ctx.view.size` after callback, must be something wrong + if (unlikely(ctx.pos > ctx.view.size)) + { + throw Exception(String(__PRETTY_FUNCTION__) + ": parse error, pos overflow. [str=" + ctx.view.toString() + "] [format=" + format + + "] [parse_pos=" + DB::toString(ctx.pos) + "] [size=" + DB::toString(ctx.view.size) + "]"); + } + } + // Extra characters at the end of date are ignored, but a warning should be reported at this case + // if (ctx.pos < ctx.view.size) {} + + // Handle the var in `ctx` + if (!mysqlTimeFix(ctx, my_time)) + return std::nullopt; + + return my_time.toPackedUInt(); +} + } // namespace DB diff --git a/dbms/src/Common/MyTime.h b/dbms/src/Common/MyTime.h index 7dc8001b025..e337013f45f 100644 --- a/dbms/src/Common/MyTime.h +++ b/dbms/src/Common/MyTime.h @@ -3,6 +3,7 @@ #include #include +struct StringRef; namespace DB { @@ -133,6 +134,24 @@ struct MyDateTimeFormatter } }; +struct MyDateTimeParser +{ + explicit MyDateTimeParser(String format_); + + std::optional parseAsPackedUInt(const StringRef & str_view) const; + + struct Context; + +private: + const String format; + + // Parsing method. Parse from ctx.view[ctx.pos]. + // If success, update `datetime`, `ctx` and return true. + // If fail, return false. + using ParserCallback = std::function; + std::vector parsers; +}; + Field parseMyDateTime(const String & str, int8_t fsp = 6); void convertTimeZone(UInt64 from_time, UInt64 & to_time, const DateLUTImpl & time_zone_from, const DateLUTImpl & time_zone_to); diff --git a/dbms/src/Common/StringUtils/StringRefUtils.h b/dbms/src/Common/StringUtils/StringRefUtils.h new file mode 100644 index 00000000000..8b3910159b2 --- /dev/null +++ b/dbms/src/Common/StringUtils/StringRefUtils.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +inline bool startsWith(const StringRef & view, const StringRef & prefix) +{ + return detail::startsWith(view.data, view.size, prefix.data, prefix.size); +} + +// case insensitive version of startsWith +inline bool startsWithCI(const StringRef & view, const StringRef & prefix) +{ + return detail::startsWithCI(view.data, view.size, prefix.data, prefix.size); +} + +inline bool endsWith(const StringRef & view, const char * prefix) +{ + return detail::endsWith(view.data, view.size, prefix, strlen(prefix)); // +} + +// case insensitive version of endsWith +inline bool endsWithCI(const StringRef & view, const char * prefix) +{ + return detail::endsWithCI(view.data, view.size, prefix, strlen(prefix)); +} + +// n - number of characters to remove from the start of the view, +// The behavior is undefined if `n > view.size` +inline StringRef removePrefix(const StringRef & view, size_t n) { return StringRef{view.data + n, view.size - n}; } diff --git a/dbms/src/Common/StringUtils/StringUtils.cpp b/dbms/src/Common/StringUtils/StringUtils.cpp index 676b00ce9ac..bea95cef391 100644 --- a/dbms/src/Common/StringUtils/StringUtils.cpp +++ b/dbms/src/Common/StringUtils/StringUtils.cpp @@ -1,16 +1,44 @@ #include "StringUtils.h" +#include + namespace detail { -bool startsWith(const std::string & s, const char * prefix, size_t prefix_size) +bool startsWith(const char * s, size_t size, const char * prefix, size_t prefix_size) +{ + return size >= prefix_size && 0 == memcmp(s, prefix, prefix_size); +} + +bool endsWith(const char * s, size_t size, const char * suffix, size_t suffix_size) { - return s.size() >= prefix_size && 0 == memcmp(s.data(), prefix, prefix_size); + return size >= suffix_size && 0 == memcmp(s + size - suffix_size, suffix, suffix_size); } -bool endsWith(const std::string & s, const char * suffix, size_t suffix_size) +bool startsWithCI(const char * s, size_t size, const char * prefix, size_t prefix_size) { - return s.size() >= suffix_size && 0 == memcmp(s.data() + s.size() - suffix_size, suffix, suffix_size); + if (size < prefix_size) + return false; + // case insensitive compare + for (size_t i = 0; i < prefix_size; ++i) + { + if (std::tolower(s[i]) != std::tolower(prefix[i])) + return false; + } + return true; } +bool endsWithCI(const char * s, size_t size, const char * suffix, size_t suffix_size) +{ + if (size < suffix_size) + return false; + // case insensitive compare + for (size_t i = 0; i < suffix_size; ++i) + { + if (std::tolower(s[i]) != std::tolower(suffix[i])) + return false; + } + return true; } + +} // namespace detail diff --git a/dbms/src/Common/StringUtils/StringUtils.h b/dbms/src/Common/StringUtils/StringUtils.h index 229f2a0638a..6d2d58c674c 100644 --- a/dbms/src/Common/StringUtils/StringUtils.h +++ b/dbms/src/Common/StringUtils/StringUtils.h @@ -1,25 +1,30 @@ #pragma once #include +#include #include #include namespace detail { - bool startsWith(const std::string & s, const char * prefix, size_t prefix_size); - bool endsWith(const std::string & s, const char * suffix, size_t suffix_size); -} +bool startsWith(const char * s, size_t size, const char * prefix, size_t prefix_size); +bool endsWith(const char * s, size_t size, const char * suffix, size_t suffix_size); + +// case insensitive version +bool startsWithCI(const char * s, size_t size, const char * prefix, size_t prefix_size); +bool endsWithCI(const char * s, size_t size, const char * suffix, size_t suffix_size); +} // namespace detail inline bool startsWith(const std::string & s, const std::string & prefix) { - return detail::startsWith(s, prefix.data(), prefix.size()); + return detail::startsWith(s.data(), s.size(), prefix.data(), prefix.size()); } inline bool endsWith(const std::string & s, const std::string & suffix) { - return detail::endsWith(s, suffix.data(), suffix.size()); + return detail::endsWith(s.data(), s.size(), suffix.data(), suffix.size()); } @@ -27,12 +32,12 @@ inline bool endsWith(const std::string & s, const std::string & suffix) /// string that is known at compile time. inline bool startsWith(const std::string & s, const char * prefix) { - return detail::startsWith(s, prefix, strlen(prefix)); + return detail::startsWith(s.data(), s.size(), prefix, strlen(prefix)); } inline bool endsWith(const std::string & s, const char * suffix) { - return detail::endsWith(s, suffix, strlen(suffix)); + return detail::endsWith(s.data(), s.size(), suffix, strlen(suffix)); // } /// Given an integer, return the adequate suffix for diff --git a/dbms/src/Common/tests/gtest_mytime.cpp b/dbms/src/Common/tests/gtest_mytime.cpp index 537e74fc522..31647d0787c 100644 --- a/dbms/src/Common/tests/gtest_mytime.cpp +++ b/dbms/src/Common/tests/gtest_mytime.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include @@ -223,5 +223,220 @@ catch (Exception & e) GTEST_FAIL(); } +TEST_F(TestMyTime, Parser) +try +{ + std::vector>> cases{ + {" 2/Jun", "%d/%b/%Y", MyDateTime{0, 6, 2, 0, 0, 0, 0}}, // More patterns than input string + {" liter", "lit era l", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, // More patterns than input string + // Test case for empty input + {" ", " ", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, // + {" ", "%d/%b/%Y", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, + // Prefix white spaces should be ignored + {" 2/Jun/2019 ", "%d/%b/%Y", MyDateTime{2019, 6, 2, 0, 0, 0, 0}}, + {" 2/Jun/2019 ", " %d/%b/%Y", MyDateTime{2019, 6, 2, 0, 0, 0, 0}}, + // + {"31/May/2016 12:34:56.1234", "%d/%b/%Y %H:%i:%S.%f", MyDateTime{2016, 5, 31, 12, 34, 56, 123400}}, + {"31/may/2016 12:34:56.1234", "%d/%b/%Y %H:%i:%S.%f", MyDateTime{2016, 5, 31, 12, 34, 56, 123400}}, // case insensitive + {"31/mayy/2016 12:34:56.1234", "%d/%b/%Y %H:%i:%S.%f", std::nullopt}, // invalid %b + {"31/mey/2016 12:34:56.1234", "%d/%b/%Y %H:%i:%S.%f", std::nullopt}, // invalid %b + {"30/April/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", MyDateTime{2016, 4, 30, 12, 34, 56, 0}}, // empty %f is valid + {"30/april/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", MyDateTime{2016, 4, 30, 12, 34, 56, 0}}, // case insensitive + {"30/Apri/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", std::nullopt}, // invalid %M + {"30/Aprill/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", std::nullopt}, // invalid %M + {"30/Feb/2016 12:34:56.1234", "%d/%b/%Y %H:%i:%S.%f", + MyDateTime{2016, 2, 30, 12, 34, 56, 123400}}, // Feb 30th (not exist in actual) is valid for parsing (in mariadb) + {"31/April/2016 12:34:56.", "%d/%M/%Y %H:%i:%s.%f", MyDateTime{2016, 4, 31, 12, 34, 56, 0}}, // April 31th (not exist in actual) + {"01,5,2013 9", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 900000}}, + {"01,52013", "%d,%c%Y", std::nullopt}, // %c will try to parse '52' as month and fail + {"01,5,2013", "%d,%c,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, // + {"01,5,2013 ", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, + + /// Test cases for AM/PM set + {"10:11:12 AM", "%H:%i:%S %p", std::nullopt}, // should not set %H %p at the same time + {"10:11:12 Am", "%h:%i:%S %p", MyDateTime(0, 0, 0, 10, 11, 12, 0)}, + {"10:11:12 A", "%h:%i:%S %p", std::nullopt}, // EOF while parsing "AM"/"PM" + {"00:11:12 AM", "%h:%i:%S %p", std::nullopt}, // should not happen: %p set, %h not set + {"11:12 AM", "%i:%S %p", std::nullopt}, // should not happen: %p set, %h not set + {"11:12 abcd", "%i:%S ", MyDateTime{0, 0, 0, 0, 11, 12, 0}}, // without %p, %h not set is ok + {"00:11:12 ", "%h:%i:%S ", std::nullopt}, // 0 is not a valid number of %h + {"12:11:12 AP", "%h:%i:%S %p", std::nullopt}, // only AM/PM is valid + {"12:11:12 AM", "%h:%i:%S %p", MyDateTime(0, 0, 0, 0, 11, 12, 0)}, + {"12:11:12 PM", "%h:%i:%S %p", MyDateTime(0, 0, 0, 12, 11, 12, 0)}, + {"11:11:12 pM", "%h:%i:%S %p", MyDateTime(0, 0, 0, 23, 11, 12, 0)}, + /// Special case for %h with 12 + {"12:11:23 ", "%h:%i:%S ", MyDateTime(0, 0, 0, 0, 11, 23, 0)}, + // For %% -- FIXME: Ignored by now, both tidb and mariadb 10.3.14 can not handle it + // {"01/Feb/2016 % 23:45:54", "%d/%b/%Y %% %H:%i:%S", MyDateTime(2016, 2, 1, 23, 45, 54, 0)}, + // {"01/Feb/2016 %% 23:45:54", "%d/%b/%Y %%%% %H:%i:%S", MyDateTime(2016, 2, 1, 23, 45, 54, 0)}, + {"01/Feb/2016 % 23:45:54", "%d/%b/%Y %% %H:%i:%S", std::nullopt}, + {"01/Feb/2016 %% 23:45:54", "%d/%b/%Y %%%% %H:%i:%S", std::nullopt}, + + /// Test cases for %r + {" 04 :13:56 AM13/05/2019", "%r %d/%c/%Y", MyDateTime{2019, 5, 13, 4, 13, 56, 0}}, + {"13:13:56 AM13/5/2019", "%r", std::nullopt}, // hh = 13 with am is invalid + {"00:13:56 AM13/05/2019", "%r", std::nullopt}, // hh = 0 with am is invalid + {"00:13:56 pM13/05/2019", "%r", std::nullopt}, // hh = 0 with pm is invalid + {"12: 13:56 AM 13/05/2019", "%r%d/%c/%Y", MyDateTime{2019, 5, 13, 0, 13, 56, 0}}, + {"12:13 :56 pm 13/05/2019", "%r %d/%c/%Y", MyDateTime{2019, 5, 13, 12, 13, 56, 0}}, + {"11:13: 56pm 13/05/2019", "%r %d/%c/%Y", MyDateTime{2019, 5, 13, 23, 13, 56, 0}}, + {"11:13:56a", "%r", std::nullopt}, // EOF while parsing "AM"/"PM" + {"11:13", "%r", MyDateTime{0, 0, 0, 11, 13, 0, 0}}, // + {"11:", "%r", MyDateTime{0, 0, 0, 11, 0, 0, 0}}, // + {"12", "%r", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, + + /// Test cases for %T + {" 4 :13:56 13/05/2019", "%T %d/%c/%Y", MyDateTime{2019, 5, 13, 4, 13, 56, 0}}, + {"23: 13:56 13/05/2019", "%T%d/%c/%Y", MyDateTime{2019, 5, 13, 23, 13, 56, 0}}, + {"12:13 :56 13/05/2019", "%T %d/%c/%Y", MyDateTime{2019, 5, 13, 12, 13, 56, 0}}, + {"19:13: 56 13/05/2019", "%T %d/%c/%Y", MyDateTime{2019, 5, 13, 19, 13, 56, 0}}, + {"21:13", "%T", MyDateTime{0, 0, 0, 21, 13, 0, 0}}, // + {"21:", "%T", MyDateTime{0, 0, 0, 21, 0, 0, 0}}, + + // mutiple chars between pattern + {"01/Feb/2016 abcdefg 23:45:54", "%d/%b/%Y abcdefg %H:%i:%S", MyDateTime(2016, 2, 1, 23, 45, 54, 0)}, + // the number of whitespace between pattern and input doesn't matter + {"01/Feb/2016 abcdefg 23:45: 54", "%d/%b/%Y abcdefg %H :%i:%S", MyDateTime(2016, 2, 1, 23, 45, 54, 0)}, + {"01/Feb/ 2016 abc defg 23:45:54", "%d/ %b/%Y abcdefg %H: %i:%S", MyDateTime(2016, 2, 1, 23, 45, 54, 0)}, + {"01/Feb /2016 ab cdefg 23: 45:54", "%d /%b/%Y abc defg %H:%i :%S", MyDateTime{2016, 2, 1, 23, 45, 54, 0}}, + + /// Cases collect from MySQL 8.0 document + {"01,5,2013", "%d,%m,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, // + {"May 1, 2013", "%M %d,%Y", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, // + {"a09:30:17", "a%h:%i:%s", MyDateTime{0, 0, 0, 9, 30, 17, 0}}, // + {"a09:30:17", "%h:%i:%s", std::nullopt}, // + {"09:30:17a", "%h:%i:%s", MyDateTime{0, 0, 0, 9, 30, 17, 0}}, // + {"abc", "abc", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, // + {"9", "%m", MyDateTime{0, 9, 0, 0, 0, 0, 0}}, // + {"9", "%s", MyDateTime{0, 0, 0, 0, 0, 9, 0}}, // + {"00/00/0000", "%m/%d/%Y", MyDateTime{0, 0, 0, 0, 0, 0, 0}}, // + {"04/31/2004", "%m/%d/%Y", MyDateTime{2004, 4, 31, 0, 0, 0, 0}}, + + /// Below cases are ported from TiDB + {"10/28/2011 9:46:29 pm", "%m/%d/%Y %l:%i:%s %p", MyDateTime(2011, 10, 28, 21, 46, 29, 0)}, + {"10/28/2011 9:46:29 Pm", "%m/%d/%Y %l:%i:%s %p", MyDateTime(2011, 10, 28, 21, 46, 29, 0)}, + {"2011/10/28 9:46:29 am", "%Y/%m/%d %l:%i:%s %p", MyDateTime(2011, 10, 28, 9, 46, 29, 0)}, + {"20161122165022", "%Y%m%d%H%i%s", MyDateTime(2016, 11, 22, 16, 50, 22, 0)}, + {"2016 11 22 16 50 22", "%Y%m%d%H%i%s", MyDateTime(2016, 11, 22, 16, 50, 22, 0)}, // fail, should ignore sep + {"16-50-22 2016 11 22", "%H-%i-%s%Y%m%d", MyDateTime(2016, 11, 22, 16, 50, 22, 0)}, // fail, should ignore sep + {"16-50 2016 11 22", "%H-%i-%s%Y%m%d", std::nullopt}, + {"15-01-2001 1:59:58.999", "%d-%m-%Y %I:%i:%s.%f", MyDateTime(2001, 1, 15, 1, 59, 58, 999000)}, + {"15-01-2001 1:59:58.1", "%d-%m-%Y %H:%i:%s.%f", MyDateTime(2001, 1, 15, 1, 59, 58, 100000)}, + {"15-01-2001 1:59:58.", "%d-%m-%Y %H:%i:%s.%f", MyDateTime(2001, 1, 15, 1, 59, 58, 0)}, + {"15-01-2001 1:9:8.999", "%d-%m-%Y %H:%i:%s.%f", MyDateTime(2001, 1, 15, 1, 9, 8, 999000)}, + {"15-01-2001 1:9:8.999", "%d-%m-%Y %H:%i:%S.%f", MyDateTime(2001, 1, 15, 1, 9, 8, 999000)}, + {"2003-01-02 10:11:12 PM", "%Y-%m-%d %H:%i:%S %p", std::nullopt}, // should not set %H %p at the same time + {"10:20:10AM", "%H:%i:%S%p", std::nullopt}, // should not set %H %p at the same time + // test %@(skip alpha), %#(skip number), %.(skip punct) + {"2020-10-10ABCD", "%Y-%m-%d%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"2020-10-101234", "%Y-%m-%d%#", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"2020-10-10....", "%Y-%m-%d%.", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"2020-10-10.1", "%Y-%m-%d%.%#%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"abcd2020-10-10.1", "%@%Y-%m-%d%.%#%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"abcd-2020-10-10.1", "%@-%Y-%m-%d%.%#%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"2020-10-10", "%Y-%m-%d%@", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + {"2020-10-10abcde123abcdef", "%Y-%m-%d%@%#", MyDateTime(2020, 10, 10, 0, 0, 0, 0)}, + + /// Cases ported from mysql testing by executing following SQL in mysql + /// create table t1 (date char(30) COLLATE latin1_bin, format char(30) COLLATE latin1_bin not null); + /// insert into t1 values (...),... + /// select if( str_to_date is not null, concat( '{"', date, '", "', format, '", ', concat( "MyDateTime{", year(str_to_date), date_format(str_to_date, ",%c,%e,"), hour(str_to_date), ",", minute(str_to_date), ",", second(str_to_date), ",", MICROSECOND(str_to_date), "}" ), '}, //' ), concat( '{"', date, '", "', format, '", std::nullopt}, //' )) as s from ( select date, format, str_to_date(date, format) as str_to_date from t1 ) a group by date, format, str_to_date order by date; + {"0003-01-02 8:11:2.123456", "%Y-%m-%d %H:%i:%S.%#", MyDateTime{3, 1, 2, 8, 11, 2, 0}}, // + {"03-01-02 8:11:2.123456", "%Y-%m-%d %H:%i:%S.%#", MyDateTime{2003, 1, 2, 8, 11, 2, 0}}, // + {"03-01-02 8:11:2.123456", "%y-%m-%d %H:%i:%S.%#", MyDateTime{2003, 1, 2, 8, 11, 2, 0}}, // + {"10:20:10", "%H:%i:%s", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"10:20:10", "%T", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"10:20:10", "%h:%i:%s.%f", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"10:20:10.44AM", "%h:%i:%s.%f%p", MyDateTime{0, 0, 0, 10, 20, 10, 440000}}, // + {"10:20:10AM", "%h:%i:%s%p", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"10:20:10AM", "%r", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"15 MAY 2001", "%d %b %Y", MyDateTime{2001, 5, 15, 0, 0, 0, 0}}, // + // {"15 SEPTEMB 2001", "%d %M %Y", MyDateTime{2001, 9, 15, 0, 0, 0, 0}}, // The SEPTEMB is a broken string of 'SEPTEMBER', ignore this case + {"15 September 2001", "%d %M %Y", MyDateTime{2001, 9, 15, 0, 0, 0, 0}}, // + {"15-01-20", "%d-%m-%y", MyDateTime{2020, 1, 15, 0, 0, 0, 0}}, // + {"15-01-2001", "%d-%m-%Y %H:%i:%S", MyDateTime{2001, 1, 15, 0, 0, 0, 0}}, // + {"15-01-2001 12:59:58", "%d-%m-%Y %H:%i:%S", MyDateTime{2001, 1, 15, 12, 59, 58, 0}}, // + {"15-2001-1", "%d-%Y-%c", MyDateTime{2001, 1, 15, 0, 0, 0, 0}}, // + {"2003-01-02 01:11:12.12345AM", "%Y-%m-%d %h:%i:%S.%f%p", MyDateTime{2003, 1, 2, 1, 11, 12, 123450}}, // + {"2003-01-02 02:11:12.12345AM", "%Y-%m-%d %h:%i:%S.%f %p", MyDateTime{2003, 1, 2, 2, 11, 12, 123450}}, // + {"2003-01-02 10:11:12", "%Y-%m-%d %H:%i:%S", MyDateTime{2003, 1, 2, 10, 11, 12, 0}}, // + {"2003-01-02 10:11:12 PM", "%Y-%m-%d %h:%i:%S %p", MyDateTime{2003, 1, 2, 22, 11, 12, 0}}, // + {"2003-01-02 11:11:12Pm", "%Y-%m-%d %h:%i:%S%p", MyDateTime{2003, 1, 2, 23, 11, 12, 0}}, // + {"2003-01-02 12:11:12.12345 am", "%Y-%m-%d %h:%i:%S.%f%p", MyDateTime{2003, 1, 2, 0, 11, 12, 123450}}, // + // some cases that are not implemented + // {"060 2004", "%j %Y", MyDateTime{2004, 2, 29, 0, 0, 0, 0}}, // + // {"15th May 2001", "%D %b %Y", MyDateTime{2001, 5, 15, 0, 0, 0, 0}}, // + // {"4 53 1998", "%w %u %Y", MyDateTime{1998, 12, 31, 0, 0, 0, 0}}, // + // {"Sund 15 MAY 2001", "%W %d %b %Y", MyDateTime{2001, 5, 15, 0, 0, 0, 0}}, // + // {"Sunday 01 2001", "%W %v %x", MyDateTime{2001, 1, 7, 0, 0, 0, 0}}, // + // {"Sunday 15 MAY 2001", "%W %d %b %Y", MyDateTime{2001, 5, 15, 0, 0, 0, 0}}, // + // {"Thursday 53 1998", "%W %u %Y", MyDateTime{1998, 12, 31, 0, 0, 0, 0}}, // + // {"Tuesday 00 2002", "%W %U %Y", MyDateTime{2002, 1, 1, 0, 0, 0, 0}}, // + // {"Tuesday 52 2001", "%W %V %X", MyDateTime{2002, 1, 1, 0, 0, 0, 0}}, // + // Test 'maybe' date formats and 'strange but correct' results + {"03-01-02 10:11:12 PM", "%Y-%m-%d %h:%i:%S %p", MyDateTime{2003, 1, 2, 22, 11, 12, 0}}, // + {"10:20:10AM", "%h:%i:%s", MyDateTime{0, 0, 0, 10, 20, 10, 0}}, // + {"2003-01-02 10:11:12", "%Y-%m-%d %h:%i:%S", MyDateTime{2003, 1, 2, 10, 11, 12, 0}}, // + // Test wrong dates or converion specifiers + {"10:20:10AM", "%H:%i:%s%p", std::nullopt}, // + {"15 Ju 2001", "%d %M %Y", std::nullopt}, // + {"15 Septembei 2001", "%d %M %Y", std::nullopt}, // + {"2003-01-02 10:11:12 PM", "%Y-%m-%d %H:%i:%S %p", std::nullopt}, // + {"2003-01-02 10:11:12 PM", "%y-%m-%d %H:%i:%S %p", std::nullopt}, // + {"2003-01-02 10:11:12.123456", "%Y-%m-%d %h:%i:%S %p", std::nullopt}, // + {"2003-01-02 10:11:12AM", "%Y-%m-%d %h:%i:%S.%f %p", std::nullopt}, // + {"2003-01-02 10:11:12AN", "%Y-%m-%d %h:%i:%S%p", std::nullopt}, // + // {"7 53 1998", "%w %u %Y", std::nullopt}, // + // {"Sund 15 MA", "%W %d %b %Y", std::nullopt}, // + // {"Sunday 01 2001", "%W %v %X", std::nullopt}, // + // {"Thursdai 12 1998", "%W %u %Y", std::nullopt}, // + // {"Tuesday 52 2001", "%W %V %Y", std::nullopt}, // + // {"Tuesday 52 2001", "%W %V %x", std::nullopt}, // + // {"Tuesday 52 2001", "%W %u %x", std::nullopt}, // + }; + auto result_formatter = MyDateTimeFormatter("%Y/%m/%d %T.%f"); + size_t idx = 0; + for (const auto & [input, fmt, expected] : cases) + { + MyDateTimeParser parser(fmt); + auto packed = parser.parseAsPackedUInt(input); + if (expected == std::nullopt) + { + MyTimeBase actual_time; + String actual_str; + if (packed) + { + actual_time = MyTimeBase(*packed); + result_formatter.format(actual_time, actual_str); + } + EXPECT_FALSE((bool)packed) // + << "[case=" << idx << "] " + << "[fmt=" << fmt << "] [input=" << input << "] [actual=" << actual_str << "]"; + } + else + { + MyTimeBase actual_time; + String actual_str, expect_str; + result_formatter.format(*expected, expect_str); + if (packed) + { + actual_time = MyTimeBase(*packed); + result_formatter.format(actual_time, actual_str); + EXPECT_EQ(*packed, expected->toPackedUInt()) + << "[case=" << idx << "] " + << "[fmt=" << fmt << "] [input=" << input << "] [expect=" << expect_str << "] [actual=" << actual_str << "]"; + } + else + { + EXPECT_TRUE((bool)packed) // + << "[case=" << idx << "] " + << "[fmt=" << fmt << "] [input=" << input << "] [expect=" << expect_str << "] [actual=]"; + } + } + idx++; + } +} +CATCH + } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 4f12f42806e..573bbd5e255 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -912,9 +912,9 @@ std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::TimestampLiteral, "cast"}, //{tipb::ScalarFuncSig::LastDay, "cast"}, - //{tipb::ScalarFuncSig::StrToDateDate, "cast"}, - //{tipb::ScalarFuncSig::StrToDateDatetime, "cast"}, - //{tipb::ScalarFuncSig::StrToDateDuration, "cast"}, + {tipb::ScalarFuncSig::StrToDateDate, "strToDateDate"}, + {tipb::ScalarFuncSig::StrToDateDatetime, "strToDateDatetime"}, + // {tipb::ScalarFuncSig::StrToDateDuration, "cast"}, {tipb::ScalarFuncSig::FromUnixTime1Arg, "fromUnixTime"}, {tipb::ScalarFuncSig::FromUnixTime2Arg, "fromUnixTime"}, {tipb::ScalarFuncSig::ExtractDatetime, "extractMyDateTime"}, //{tipb::ScalarFuncSig::ExtractDuration, "cast"}, diff --git a/dbms/src/Flash/Coprocessor/TiDBBit.h b/dbms/src/Flash/Coprocessor/TiDBBit.h index 442198e80da..5c1684a7068 100644 --- a/dbms/src/Flash/Coprocessor/TiDBBit.h +++ b/dbms/src/Flash/Coprocessor/TiDBBit.h @@ -5,7 +5,6 @@ #include #pragma GCC diagnostic pop -#include #include #include #include diff --git a/dbms/src/Flash/Coprocessor/TiDBEnum.h b/dbms/src/Flash/Coprocessor/TiDBEnum.h index 4c1679eba2c..71b98315fe4 100644 --- a/dbms/src/Flash/Coprocessor/TiDBEnum.h +++ b/dbms/src/Flash/Coprocessor/TiDBEnum.h @@ -5,7 +5,6 @@ #include #pragma GCC diagnostic pop -#include #include #include diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp index c3a038867c2..cb060c6644a 100644 --- a/dbms/src/Functions/FunctionHelpers.cpp +++ b/dbms/src/Functions/FunctionHelpers.cpp @@ -1,10 +1,11 @@ -#include -#include -#include #include #include +#include +#include #include +#include #include + #include "FunctionsArithmetic.h" @@ -18,8 +19,7 @@ const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * co const ColumnConst * res = static_cast(column); - if (checkColumn(&res->getDataColumn()) - || checkColumn(&res->getDataColumn())) + if (checkColumn(&res->getDataColumn()) || checkColumn(&res->getDataColumn())) return res; return {}; @@ -67,13 +67,18 @@ static Block createBlockWithNestedColumnsImpl(const Block & block, const std::un } else if (col.column->isColumnConst()) { - const auto & nested_col = static_cast( - static_cast(*col.column).getDataColumn()).getNestedColumnPtr(); + const auto & nested_col = static_cast( // + static_cast(*col.column).getDataColumn()) + .getNestedColumnPtr(); - res.insert({ ColumnConst::create(nested_col, rows), nested_type, col.name}); + res.insert({ColumnConst::create(nested_col, rows), nested_type, col.name}); } else - throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Illegal column for DataTypeNullable:" + col.type->getName() + " [column_name=" + col.name + + "] [created=" + DB::toString(bool(col.column)) + + "] [nullable=" + (col.column ? DB::toString(bool(col.column->isColumnNullable())) : "null") + + "] [const=" + (col.column ? DB::toString(bool(col.column->isColumnConst())) : "null") + "]", + ErrorCodes::ILLEGAL_COLUMN); } else res.insert(col); @@ -96,14 +101,11 @@ Block createBlockWithNestedColumns(const Block & block, const ColumnNumbers & ar return createBlockWithNestedColumnsImpl(block, args_set); } -bool functionIsInOperator(const String & name) -{ - return name == "in" || name == "notIn" || name == "tidbIn" || name == "tidbNotIn"; -} +bool functionIsInOperator(const String & name) { return name == "in" || name == "notIn" || name == "tidbIn" || name == "tidbNotIn"; } bool functionIsInOrGlobalInOperator(const String & name) { return name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn" || name == "tidbIn" || name == "tidbNotIn"; } -} +} // namespace DB diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 3ec28334953..8f7846ce324 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -7,8 +7,6 @@ #include #include -#include - #include #include #include diff --git a/dbms/src/Functions/FunctionsConversion.cpp b/dbms/src/Functions/FunctionsConversion.cpp index 0ef4369d4ac..69d18595f76 100644 --- a/dbms/src/Functions/FunctionsConversion.cpp +++ b/dbms/src/Functions/FunctionsConversion.cpp @@ -91,6 +91,8 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index d30420db2ad..8a0c88c9ffb 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -1588,7 +1588,166 @@ class FunctionDateFormat : public IFunction throw Exception("Second argument for function " + getName() + " must be String constant", ErrorCodes::ILLEGAL_COLUMN); } } +}; + +struct NameStrToDateDate +{ + static constexpr auto name = "strToDateDate"; +}; +struct NameStrToDateDatetime +{ + static constexpr auto name = "strToDateDatetime"; +}; +template +class FunctionStrToDate : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + bool isInjective(const Block &) override { return false; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 2) + throw Exception("Function " + getName() + " only accept 2 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + // TODO: Maybe FixedString? + if (!removeNullable(arguments[0].type)->isString()) + throw Exception("First argument for function " + getName() + " must be String, but get " + arguments[0].type->getName(), + ErrorCodes::ILLEGAL_COLUMN); + if (!removeNullable(arguments[1].type)->isString()) + throw Exception( + "Second argument for function " + getName() + " must be String, but get " + arguments[1].type->getName(), + ErrorCodes::ILLEGAL_COLUMN); + + if constexpr (std::is_same_v) + { + // Return null for invalid result + // FIXME: set fraction for DataTypeMyDateTime + return makeNullable(std::make_shared()); + } + else if constexpr (std::is_same_v) + { + // Return null for invalid result + return makeNullable(std::make_shared()); + } + else + { + throw Exception("Unknown name for FunctionStrToDate:" + getName(), ErrorCodes::LOGICAL_ERROR); + } + } + + // FIXME: Should we override other method? + bool useDefaultImplementationForConstants() const override { return true; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override + { + const auto & input_column = block.getByPosition(arguments[0]).column; + const size_t num_rows = input_column->size(); + const ColumnString * input_raw_col = nullptr; + if (input_column->isColumnNullable()) + { + auto null_input_column = checkAndGetColumn(input_column.get()); + input_raw_col = checkAndGetColumn(null_input_column->getNestedColumnPtr().get()); + } + else + { + input_raw_col = checkAndGetColumn(input_column.get()); + } + auto datetime_column = ColumnVector::create(num_rows); + auto & datetime_res = datetime_column->getData(); + auto null_column = ColumnUInt8::create(num_rows); + auto & null_res = null_column->getData(); + + const auto & format_column = block.getByPosition(arguments[1]).column; + if (format_column->isColumnConst()) + { + // Precomplie format parser + const auto & col_const = checkAndGetColumnConst(format_column.get()); + auto format = col_const->getValue(); + auto parser = MyDateTimeParser(format); + for (size_t i = 0; i < num_rows; ++i) + { + if (input_column->isColumnNullable()) + { + // For null input, just set the result as null + if (bool is_null = input_column->isNullAt(i); is_null) + { + null_res[i] = is_null; + continue; + } + // else fallthrough to parsing + } + const auto str_ref = input_raw_col->getDataAt(i); + if (auto parse_res = parser.parseAsPackedUInt(str_ref); parse_res) + { + datetime_res[i] = *parse_res; + null_res[i] = 0; + } + else + { + datetime_res[i] = 0; + null_res[i] = 1; + } + } + } // end of format_column->isColumnConst() + else + { + const ColumnString * format_raw_col = nullptr; + if (format_column->isColumnNullable()) + { + auto null_format_column = checkAndGetColumn(format_column.get()); + format_raw_col = checkAndGetColumn(null_format_column->getNestedColumnPtr().get()); + } + else + { + format_raw_col = checkAndGetColumn(format_column.get()); + } + + for (size_t i = 0; i < num_rows; ++i) + { + // Set null for either null input or null format + if (input_column->isColumnNullable()) + { + if (bool is_null = input_column->isNullAt(i); is_null) + { + null_res[i] = is_null; + continue; + } + // else fallthrough to parsing + } + if (format_column->isColumnNullable()) + { + if (bool is_null = format_column->isNullAt(i); is_null) + { + null_res[i] = is_null; + continue; + } + // else fallthrough to parsing + } + + const auto format_ref = format_raw_col->getDataAt(i); + auto parser = MyDateTimeParser(format_ref.toString()); + const auto str_ref = input_raw_col->getDataAt(i); + if (auto parse_res = parser.parseAsPackedUInt(str_ref); parse_res) + { + datetime_res[i] = *parse_res; + null_res[i] = 0; + } + else + { + datetime_res[i] = 0; + null_res[i] = 1; + } + } + } // end of !format_column->isColumnConst() + block.getByPosition(result).column = ColumnNullable::create(std::move(datetime_column), std::move(null_column)); + } }; diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp index 5b1cb728942..ed1ab3de3b0 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.cpp @@ -1,3 +1,4 @@ +#include #include namespace DB @@ -101,4 +102,4 @@ void appendIntoHandleColumn(ColumnVector::Container & handle_column, con } } // namespace DM -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h index d5ece791928..e7b3a9930bb 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeHelpers.h @@ -7,9 +7,7 @@ #include #include #include -#include #include -#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index b9f08ba7812..597ad10ef9b 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp index 18089f84ac6..c6d2f4922a2 100644 --- a/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp +++ b/dbms/src/Storages/DeltaMerge/Index/MinMaxIndex.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRange.h b/dbms/src/Storages/DeltaMerge/RowKeyRange.h index c82725100df..09ce340d9af 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRange.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRange.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB::DM { diff --git a/dbms/src/Storages/DeltaMerge/convertColumnTypeHelpers.cpp b/dbms/src/Storages/DeltaMerge/convertColumnTypeHelpers.cpp index b458fa79464..036794caf1c 100644 --- a/dbms/src/Storages/DeltaMerge/convertColumnTypeHelpers.cpp +++ b/dbms/src/Storages/DeltaMerge/convertColumnTypeHelpers.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + namespace DM { diff --git a/tests/fullstack-test/expr/str_to_date.test b/tests/fullstack-test/expr/str_to_date.test new file mode 100644 index 00000000000..f9baeb89c27 --- /dev/null +++ b/tests/fullstack-test/expr/str_to_date.test @@ -0,0 +1,70 @@ +mysql> drop table if exists test.t +mysql> create table test.t(a char(64), suite int not null) +mysql> alter table test.t set tiflash replica 1 +mysql> insert into test.t values ('00/00/0000', 1),('13/05/2019', 1),('0/0/2012',1),('abc', 1); +mysql> insert into test.t values ('31/May /2016 12: 34:56.1234', 2),('30/Apr/2016 12:34:56.', 2),('30/Apr/2016 12:34:56.9', 2); +mysql> insert into test.t values ('31 /May/2016 12: 34:56.', 3),('30/Apr/2016 12:34:56', 3); +mysql> insert into test.t values ('31/May/2016', 4),('30/ Apr/ 2016 ', 4),(' 1/Apr/2016 ', 4); + +func> wait_table test t + +# Note that we need to put `str_to_date` in group by to make sure it is pushed down + +## Test suite 1 - Allow zero day +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('00/00/0000', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by a,date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('00/00/0000', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by a,date order by a ++------------+------------+-----+ +| a | date | cnt | ++------------+------------+-----+ +| 0/0/2012 | 2012-00-00 | 1 | +| 00/00/0000 | 0000-00-00 | 1 | +| 13/05/2019 | 2019-05-13 | 1 | +| abc | 0000-00-00 | 1 | ++------------+------------+-----+ + +## Test suite 1 - Disallow zero day +# The sql_mode does not effect the result set +#mysql> set sql_mode='NO_ZERO_IN_DATE,NO_ZERO_DATE'; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('00/00/0000', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by a,date order by a +mysql> set sql_mode='NO_ZERO_IN_DATE,NO_ZERO_DATE'; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, ifnull(str_to_date(a, '%d/%m/%Y'),str_to_date('00/00/0000', '%d/%m/%Y')) as date, count(*) as cnt from test.t where suite = 1 group by a,date order by a ++------------+------------+-----+ +| a | date | cnt | ++------------+------------+-----+ +| 0/0/2012 | NULL | 1 | +| 00/00/0000 | NULL | 1 | +| 13/05/2019 | 2019-05-13 | 1 | +| abc | NULL | 1 | ++------------+------------+-----+ + +## Test suite 2 - showing datetime with fractions +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, '%d/%b/%Y %H:%i:%S.%f') as date from test.t where suite = 2 group by a,date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, str_to_date(a, '%d/%b/%Y %H:%i:%S.%f') as date from test.t where suite = 2 group by a,date order by a ++-----------------------------+----------------------------+ +| a | date | ++-----------------------------+----------------------------+ +| 30/Apr/2016 12:34:56. | 2016-04-30 12:34:56.000000 | +| 30/Apr/2016 12:34:56.9 | 2016-04-30 12:34:56.900000 | +| 31/May /2016 12: 34:56.1234 | 2016-05-31 12:34:56.123400 | ++-----------------------------+----------------------------+ + +## Test suite 3 - showing datetime without fractions +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, ' %d/%b/%Y %H:%i:%S') as date from test.t where suite = 3 group by a,date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, str_to_date(a, ' %d/%b/%Y %H:%i:%S') as date from test.t where suite = 3 group by a,date order by a ++-------------------------+---------------------+ +| a | date | ++-------------------------+---------------------+ +| 30/Apr/2016 12:34:56 | 2016-04-30 12:34:56 | +| 31 /May/2016 12: 34:56. | 2016-05-31 12:34:56 | ++-------------------------+---------------------+ + +## Test suite 4 - showing date +#mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; explain select a, str_to_date(a, '%d/%b/%Y ') as date from test.t where suite = 4 group by a,date order by a +mysql> set sql_mode=''; set tidb_allow_mpp=1; set tidb_isolation_read_engines='tiflash'; select a, str_to_date(a, '%d/%b/%Y ') as date from test.t where suite = 4 group by a,date order by a ++---------------+------------+ +| a | date | ++---------------+------------+ +| 1/Apr/2016 | 2016-04-01 | +| 30/ Apr/ 2016 | 2016-04-30 | +| 31/May/2016 | 2016-05-31 | ++---------------+------------+ + +mysql> drop table if exists test.t