Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix date format identifies '\n' as invalid separator (#4046) #4056

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 44 additions & 44 deletions dbms/src/Common/MyTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ bool isValidSeperator(char c, int previous_parts)
if (isPunctuation(c))
return true;

return previous_parts == 2 && (c == ' ' || c == 'T');
// for https://github.com/pingcap/tics/issues/4036
return previous_parts == 2 && (c == 'T' || isWhitespaceASCII(c));
}

std::vector<String> parseDateFormat(String format)
Expand Down Expand Up @@ -508,8 +509,8 @@ Field parseMyDateTime(const String & str, int8_t fsp)

bool truncated_or_incorrect = false;

// noAbsorb tests if can absorb FSP or TZ
auto noAbsorb = [](const std::vector<String> & seps) {
// no_absorb tests if can absorb FSP or TZ
auto no_absorb = [](const std::vector<String> & seps) {
// if we have more than 5 parts (i.e. 6), the tailing part can't be absorbed
// or if we only have 1 part, but its length is longer than 4, then it is at least YYMMD, in this case, FSP can
// not be absorbed, and it will be handled later, and the leading sign prevents TZ from being absorbed, because
Expand All @@ -519,7 +520,7 @@ Field parseMyDateTime(const String & str, int8_t fsp)

if (!frac_str.empty())
{
if (!noAbsorb(seps))
if (!no_absorb(seps))
{
seps.push_back(frac_str);
frac_str = "";
Expand All @@ -530,7 +531,7 @@ Field parseMyDateTime(const String & str, int8_t fsp)
{
// if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z),
// therefore we could safely skip this branch.
if (!noAbsorb(seps) && !(tz_minute != "" && tz_sep == ""))
if (!no_absorb(seps) && !(!tz_minute.empty() && tz_sep.empty()))
{
// we can't absorb timezone if there is no separate between tz_hour and tz_minute
if (!tz_hour.empty())
Expand All @@ -555,51 +556,51 @@ Field parseMyDateTime(const String & str, int8_t fsp)
{
case 14: // YYYYMMDDHHMMSS
{
std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second);
std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); //NOLINT
hhmmss = true;
break;
}
case 12: // YYMMDDHHMMSS
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); //NOLINT
year = adjustYear(year);
hhmmss = true;
break;
}
case 11: // YYMMDDHHMMS
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); //NOLINT
year = adjustYear(year);
hhmmss = true;
break;
}
case 10: // YYMMDDHHMM
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); //NOLINT
year = adjustYear(year);
break;
}
case 9: // YYMMDDHHM
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); //NOLINT
year = adjustYear(year);
break;
}
case 8: // YYYYMMDD
{
std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day);
std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); //NOLINT
break;
}
case 7: // YYMMDDH
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour); //NOLINT
year = adjustYear(year);
break;
}
case 6: // YYMMDD
case 5: // YYMMD
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day);
std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); //NOLINT
year = adjustYear(year);
break;
}
Expand All @@ -623,18 +624,18 @@ Field parseMyDateTime(const String & str, int8_t fsp)
case 1:
case 2:
{
ret = std::sscanf(frac_str.c_str(), "%2d ", &hour);
ret = std::sscanf(frac_str.c_str(), "%2d ", &hour); //NOLINT
break;
}
case 3:
case 4:
{
ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute);
ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute); //NOLINT
break;
}
default:
{
ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second);
ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second); //NOLINT
break;
}
}
Expand All @@ -648,7 +649,7 @@ Field parseMyDateTime(const String & str, int8_t fsp)
}
else
{
truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0);
truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0); //NOLINT
}
}
if (truncated_or_incorrect)
Expand Down Expand Up @@ -944,7 +945,7 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const
{
constexpr static UInt8 max_days_in_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
static auto is_leap_year = [](UInt16 _year) { return ((_year % 4 == 0) && (_year % 100 != 0)) || (_year % 400 == 0); };
max_day = max_days_in_month[month - 1];
max_day = max_days_in_month[month - 1]; // NOLINT
if (month == 2 && is_leap_year(year))
{
max_day = 29;
Expand All @@ -969,7 +970,6 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const
{
throw TiFlashException("Incorrect datetime value", Errors::Types::WrongValue);
}
return;
}

bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 & day, const UInt64 & hour, const UInt64 & minute,
Expand Down Expand Up @@ -1270,27 +1270,27 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
{
// Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure
size_t temp_pos = ctx.pos;
auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState {
auto check_if_end = [&temp_pos, &ctx]() -> ParseState {
// To the end
if (temp_pos == ctx.view.size)
return ParseState::END_OF_FILE;
return ParseState::NORMAL;
};
auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState {
auto skip_whitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState {
while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos]))
++temp_pos;
return checkIfEnd();
return check_if_end();
};
auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState {
if (skipWhitespaces() == ParseState::END_OF_FILE)
auto parse_sep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState {
if (skip_whitespaces() == ParseState::END_OF_FILE)
return ParseState::END_OF_FILE;
// parse ":"
if (ctx.view.data[temp_pos] != ':')
return ParseState::FAIL;
temp_pos += 1; // move forward
return ParseState::NORMAL;
};
auto tryParse = [&]() -> ParseState {
auto try_parse = [&]() -> ParseState {
ParseState state = ParseState::NORMAL;
/// Note that we should update `time` as soon as possible, or we
/// can not get correct result for incomplete input like "12:13"
Expand All @@ -1299,7 +1299,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
// hh
size_t step = 0;
int32_t hour = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || hour > 12 || hour == 0)
Expand All @@ -1311,23 +1311,23 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
time.hour = hour;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t minute = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || minute > 59)
return ParseState::FAIL;
time.minute = minute;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t second = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || second > 59)
Expand All @@ -1336,7 +1336,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
temp_pos += step; // move forward

int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
// "AM"/"PM" must be parsed as a single element
// "11:13:56a" is an invalid input for "%r".
Expand All @@ -1362,7 +1362,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
temp_pos += 2; // move forward
return ParseState::NORMAL;
};
if (auto state = tryParse(); state == ParseState::FAIL)
if (auto state = try_parse(); state == ParseState::FAIL)
return false;
// Other state, forward the `ctx.pos` and return true
ctx.pos = temp_pos;
Expand All @@ -1374,27 +1374,27 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
{
// Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure
size_t temp_pos = ctx.pos;
auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState {
auto check_if_end = [&temp_pos, &ctx]() -> ParseState {
// To the end
if (temp_pos == ctx.view.size)
return ParseState::END_OF_FILE;
return ParseState::NORMAL;
};
auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState {
auto skip_whitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState {
while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos]))
++temp_pos;
return checkIfEnd();
return check_if_end();
};
auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState {
if (skipWhitespaces() == ParseState::END_OF_FILE)
auto parse_sep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState {
if (skip_whitespaces() == ParseState::END_OF_FILE)
return ParseState::END_OF_FILE;
// parse ":"
if (ctx.view.data[temp_pos] != ':')
return ParseState::FAIL;
temp_pos += 1; // move forward
return ParseState::NORMAL;
};
auto tryParse = [&]() -> ParseState {
auto try_parse = [&]() -> ParseState {
ParseState state = ParseState::NORMAL;
/// Note that we should update `time` as soon as possible, or we
/// can not get correct result for incomplete input like "12:13"
Expand All @@ -1403,31 +1403,31 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
// hh
size_t step = 0;
int32_t hour = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || hour > 23)
return ParseState::FAIL;
time.hour = hour;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t minute = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || minute > 59)
return ParseState::FAIL;
time.minute = minute;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t second = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || second > 59)
Expand All @@ -1437,7 +1437,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)

return ParseState::NORMAL;
};
if (auto state = tryParse(); state == ParseState::FAIL)
if (auto state = try_parse(); state == ParseState::FAIL)
return false;
// Other state, forward the `ctx.pos` and return true
ctx.pos = temp_pos;
Expand Down