From bcaaec9c675c886a8acb30450b018c8fc2b0d7e3 Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Sun, 4 Feb 2024 17:57:22 +0800 Subject: [PATCH 1/2] two pr --- be/src/vec/functions/function.h | 12 +- be/src/vec/runtime/vdatetime_value.cpp | 280 +++++++++------- be/src/vec/runtime/vdatetime_value.h | 2 +- .../function/function_array_index_test.cpp | 14 +- be/test/vec/function/function_test_util.h | 19 +- .../date-time-functions/date-format.md | 10 +- .../date-time-functions/str-to-date.md | 10 +- .../date-time-functions/date-format.md | 4 +- .../date-time-functions/str-to-date.md | 10 +- .../apache/doris/analysis/DateLiteral.java | 311 ++++++++++-------- .../data/correctness/test_str_to_date.out | 24 ++ .../datatype_p0/date/test_invalid_date.out | 7 - .../correctness/test_str_to_date.groovy | 23 +- .../datatype_p0/date/test_invalid_date.groovy | 37 --- 14 files changed, 410 insertions(+), 353 deletions(-) delete mode 100644 regression-test/data/datatype_p0/date/test_invalid_date.out delete mode 100644 regression-test/suites/datatype_p0/date/test_invalid_date.groovy diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 6a9d83d9669a9f..aa597eed4ea0cb 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -367,9 +367,11 @@ class FunctionBuilderImpl : public IFunctionBuilder { bool is_stateful() const override { return false; } bool is_variadic() const override { return false; } - /// Default implementation. Will check only in non-variadic case. + // Default implementation. Will check only in non-variadic case. void check_number_of_arguments(size_t number_of_arguments) const override; - + // the return type should be same with what FE plans. + // it returns: `get_return_type_impl` if `use_default_implementation_for_nulls` = false + // `get_return_type_impl` warpped in NULL if `use_default_implementation_for_nulls` = true and input has NULL DataTypePtr get_return_type(const ColumnsWithTypeAndName& arguments) const; DataTypes get_variadic_argument_types() const override { @@ -383,7 +385,9 @@ class FunctionBuilderImpl : public IFunctionBuilder { } protected: - /// Get the result type by argument type. If the function does not apply to these arguments, throw an exception. + // Get the result type by argument type. If the function does not apply to these arguments, throw an exception. + // the get_return_type_impl and its overrides should only return the nested type if `use_default_implementation_for_nulls` is true. + // whether to wrap in nullable type will be automatically decided. virtual DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const { DataTypes data_types(arguments.size()); for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type; @@ -400,7 +404,7 @@ class FunctionBuilderImpl : public IFunctionBuilder { * if some of arguments are Nullable(Nothing) then don't call get_return_type(), call build_impl() with return_type = Nullable(Nothing), * if some of arguments are Nullable, then: * - Nullable types are substituted with nested types for get_return_type() function - * - wrap get_return_type() result in Nullable type and pass to build_impl + * - WRAP get_return_type() RESULT IN NULLABLE type and pass to build_impl * * Otherwise build returns build_impl(arguments, get_return_type(arguments)); */ diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index e7c8d0915d9552..47b77b3d6bf9e1 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -19,19 +19,18 @@ #include #include -#include #include -#include -#include -#include + +#include +#include +#include +#include // IWYU pragma: no_include #include -#include #include // IWYU pragma: keep // IWYU pragma: no_include #include -#include -#include +#include #include #include "common/compiler_util.h" @@ -392,10 +391,7 @@ bool VecDateTimeValue::from_time_int64(int64_t value) { return false; } _second = value % 100; - if (_second > TIME_MAX_SECOND) { - return false; - } - return true; + return _second <= TIME_MAX_SECOND; } char* VecDateTimeValue::append_date_buffer(char* to) const { @@ -685,7 +681,7 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c } char buf[64]; char* cursor = buf; - char* pos = NULL; + char* pos = nullptr; const char* ptr = format; const char* end = format + len; char ch = '\0'; @@ -1144,12 +1140,9 @@ static bool str_to_int64(const char* ptr, const char** endptr, int64_t* ret) { uint64_t value_3 = 0; // Check overflow. - if (value_1 > cutoff_1 || - (value_1 == cutoff_1 && - (value_2 > cutoff_2 || (value_2 == cutoff_2 && value_3 > cutoff_3)))) { - return false; - } - return true; + return value_1 <= cutoff_1 && + (value_1 != cutoff_1 || + (value_2 <= cutoff_2 && (value_2 != cutoff_2 || value_3 <= cutoff_3))); } static int min(int a, int b) { @@ -1160,7 +1153,7 @@ static int find_in_lib(const char* lib[], const char* str, const char* end) { int pos = 0; int find_count = 0; int find_pos = 0; - for (; lib[pos] != NULL; ++pos) { + for (; lib[pos] != nullptr; ++pos) { const char* i = str; const char* j = lib[pos]; while (i < end && *j) { @@ -1198,26 +1191,41 @@ static int check_word(const char* lib[], const char* str, const char* end, const // change this method should also change that. bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, const char* value, int value_len, const char** sub_val_end) { + if (value_len <= 0) [[unlikely]] { + return false; + } const char* ptr = format; const char* end = format + format_len; const char* val = value; const char* val_end = value + value_len; - bool date_part_used = false; - bool time_part_used = false; - bool frac_part_used = false; - bool already_set_time_part = false; - - int day_part = 0; + bool already_set_time_part = false; // skip time part in the end's setting. + + uint32_t part_used = 0; + constexpr int YEAR_PART = 1U << 0; + constexpr int MONTH_PART = 1U << 1; + constexpr int DAY_PART = 1U << 2; + constexpr int NORMAL_DATE_PART = YEAR_PART | MONTH_PART | DAY_PART; + constexpr int WEEKDAY_PART = 1U << 3; + constexpr int YEARDAY_PART = 1U << 4; + constexpr int WEEK_NUM_PART = 1U << 5; + constexpr int SPECIAL_DATE_PART = WEEKDAY_PART | YEARDAY_PART | WEEK_NUM_PART; + [[maybe_unused]] constexpr int DATE_PART = NORMAL_DATE_PART | SPECIAL_DATE_PART; + constexpr int HOUR_PART = 1U << 6; + constexpr int MINUTE_PART = 1U << 7; + constexpr int SECOND_PART = 1U << 8; + constexpr int TIME_PART = HOUR_PART | MINUTE_PART | SECOND_PART; + + int half_day = 0; // 0 for am/none, 12 for pm. int weekday = -1; int yearday = -1; - int week_num = -1; + int week_num = -1; // week idx in one year bool strict_week_number = false; bool sunday_first = false; bool strict_week_number_year_type = false; int strict_week_number_year = -1; - bool usa_time = false; + bool hour_system_12 = false; auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0, 0}; while (ptr < end && val < val_end) { @@ -1230,7 +1238,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } // Check switch if (*ptr == '%' && ptr + 1 < end) { - const char* tmp = NULL; + const char* tmp = nullptr; int64_t int_value = 0; ptr++; switch (*ptr++) { @@ -1244,7 +1252,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, int_value += int_value >= 70 ? 1900 : 2000; year = int_value; val = tmp; - date_part_used = true; + part_used |= YEAR_PART; break; case 'Y': // Year, numeric, four digits @@ -1257,7 +1265,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } year = int_value; val = tmp; - date_part_used = true; + part_used |= YEAR_PART; break; // Month case 'm': @@ -1268,7 +1276,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } month = int_value; val = tmp; - date_part_used = true; + part_used |= MONTH_PART; break; case 'M': int_value = check_word(const_cast(s_month_name), val, val_end, &val); @@ -1276,6 +1284,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, return false; } month = int_value; + part_used |= MONTH_PART; break; case 'b': int_value = check_word(s_ab_month_name, val, val_end, &val); @@ -1283,6 +1292,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, return false; } month = int_value; + part_used |= MONTH_PART; break; // Day case 'd': @@ -1293,7 +1303,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } day = int_value; val = tmp; - date_part_used = true; + part_used |= DAY_PART; break; case 'D': tmp = val + min(2, val_end - val); @@ -1302,13 +1312,14 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } day = int_value; val = tmp + min(2, val_end - tmp); - date_part_used = true; + part_used |= DAY_PART; break; // Hour case 'h': case 'I': case 'l': - usa_time = true; + hour_system_12 = true; + part_used |= HOUR_PART; // Fall through case 'k': case 'H': @@ -1318,7 +1329,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } hour = int_value; val = tmp; - time_part_used = true; + part_used |= HOUR_PART; break; // Minute case 'i': @@ -1328,7 +1339,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } minute = int_value; val = tmp; - time_part_used = true; + part_used |= MINUTE_PART; break; // Second case 's': @@ -1339,7 +1350,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } second = int_value; val = tmp; - time_part_used = true; + part_used |= SECOND_PART; break; // Micro second case 'f': @@ -1350,16 +1361,15 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } val = tmp; break; - // AM/PM + // AM/PM, only meaningful for 12-hour system. case 'p': - if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !usa_time) { + if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !hour_system_12) { return false; } if (toupper(*val) == 'P') { // PM - day_part = 12; + half_day = 12; } - time_part_used = true; val += 2; break; // Weekday @@ -1370,7 +1380,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } int_value++; weekday = int_value; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'a': int_value = check_word(s_ab_day_name, val, val_end, &val); @@ -1379,7 +1389,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } int_value++; weekday = int_value; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'w': tmp = val + min(1, val_end - val); @@ -1394,7 +1404,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } weekday = int_value; val = tmp; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'j': tmp = val + min(3, val_end - val); @@ -1403,7 +1413,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } yearday = int_value; val = tmp; - date_part_used = true; + part_used |= YEARDAY_PART; break; case 'u': case 'v': @@ -1421,7 +1431,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, return false; } val = tmp; - date_part_used = true; + part_used |= WEEK_NUM_PART; break; // strict week number, must be used with %V or %v case 'x': @@ -1433,7 +1443,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } strict_week_number_year = int_value; val = tmp; - date_part_used = true; + part_used |= WEEK_NUM_PART; break; case 'r': { VecDateTimeValue tmp_val; @@ -1444,7 +1454,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, this->_minute = tmp_val._minute; this->_second = tmp_val._second; val = tmp; - time_part_used = true; + part_used |= TIME_PART; already_set_time_part = true; break; } @@ -1456,7 +1466,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, this->_hour = tmp_val._hour; this->_minute = tmp_val._minute; this->_second = tmp_val._second; - time_part_used = true; + part_used |= TIME_PART; already_set_time_part = true; val = tmp; break; @@ -1496,8 +1506,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } } - // continue to iterate pattern if has - // to find out if it has time part. + // for compatible with mysql, like something have %H:%i:%s format but no relative content... while (ptr < end) { if (*ptr == '%' && ptr + 1 < end) { ptr++; @@ -1510,10 +1519,11 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, case 'l': case 'r': case 's': + case 'f': case 'S': case 'p': case 'T': - time_part_used = true; + part_used |= TIME_PART; break; default: break; @@ -1523,33 +1533,29 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } } - if (usa_time) { + if (!part_used) { + return false; + } + + if (hour_system_12) { if (hour > 12 || hour < 1) { return false; } - hour = (hour % 12) + day_part; + hour = (hour % 12) + half_day; } if (sub_val_end) { *sub_val_end = val; } // Compute timestamp type - if (frac_part_used) { - if (date_part_used) { + if (part_used & DATE_PART) { + if (part_used & TIME_PART) { _type = TIME_DATETIME; } else { - _type = TIME_TIME; + _type = TIME_DATE; } } else { - if (date_part_used) { - if (time_part_used) { - _type = TIME_DATETIME; - } else { - _type = TIME_DATE; - } - } else { - _type = TIME_TIME; - } + _type = TIME_TIME; } _neg = false; @@ -1588,11 +1594,25 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, // so we only need to set date part // 3. if both are true, means all part of date_time be set, no need check_range_and_set_time bool already_set_date_part = yearday > 0 || (week_num >= 0 && weekday > 0); - if (already_set_date_part && already_set_time_part) return true; - if (already_set_date_part) + if (already_set_date_part && already_set_time_part) { + return true; + } + // complete default month/day + if (!(part_used & ~NORMAL_DATE_PART)) { // Ymd part only + if (!(part_used & DAY_PART)) { + day = 1; + if (!(part_used & MONTH_PART)) { + month = 1; + } + } + } + + if (already_set_date_part) { return check_range_and_set_time(_year, _month, _day, hour, minute, second, _type); - if (already_set_time_part) + } + if (already_set_time_part) { return check_range_and_set_time(year, month, day, _hour, _minute, _second, _type); + } return check_range_and_set_time(year, month, day, hour, minute, second, _type); } @@ -1755,7 +1775,7 @@ void VecDateTimeValue::from_unixtime(int64_t timestamp, const cctz::time_zone& c const char* VecDateTimeValue::month_name() const { if (_month < 1 || _month > 12) { - return NULL; + return nullptr; } return s_month_name[_month]; } @@ -1763,14 +1783,14 @@ const char* VecDateTimeValue::month_name() const { const char* VecDateTimeValue::day_name() const { int day = weekday(); if (day < 0 || day >= 7) { - return NULL; + return nullptr; } return s_day_name[day]; } VecDateTimeValue VecDateTimeValue::local_time() { VecDateTimeValue value; - value.from_unixtime(time(NULL), TimezoneUtils::default_time_zone); + value.from_unixtime(time(nullptr), TimezoneUtils::default_time_zone); return value; } @@ -2154,17 +2174,33 @@ void DateV2Value::set_zero() { template bool DateV2Value::from_date_format_str(const char* format, int format_len, const char* value, int value_len, const char** sub_val_end) { + if (value_len <= 0) [[unlikely]] { + return false; + } const char* ptr = format; const char* end = format + format_len; const char* val = value; const char* val_end = value + value_len; - bool date_part_used = false; - bool time_part_used = false; - bool frac_part_used = false; - bool already_set_time_part = false; - - int day_part = 0; + bool already_set_time_part = false; // skip time part in the end's setting. + + uint32_t part_used = 0; + constexpr int YEAR_PART = 1U << 0; + constexpr int MONTH_PART = 1U << 1; + constexpr int DAY_PART = 1U << 2; + constexpr int NORMAL_DATE_PART = YEAR_PART | MONTH_PART | DAY_PART; + constexpr int WEEKDAY_PART = 1U << 3; + constexpr int YEARDAY_PART = 1U << 4; + constexpr int WEEK_NUM_PART = 1U << 5; + constexpr int SPECIAL_DATE_PART = WEEKDAY_PART | YEARDAY_PART | WEEK_NUM_PART; + [[maybe_unused]] constexpr int DATE_PART = NORMAL_DATE_PART | SPECIAL_DATE_PART; + constexpr int HOUR_PART = 1U << 6; + constexpr int MINUTE_PART = 1U << 7; + constexpr int SECOND_PART = 1U << 8; + constexpr int FRAC_PART = 1U << 9; + constexpr int TIME_PART = HOUR_PART | MINUTE_PART | SECOND_PART | FRAC_PART; + + int half_day = 0; // 0 for am/none, 12 for pm. int weekday = -1; int yearday = -1; int week_num = -1; @@ -2173,7 +2209,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co bool sunday_first = false; bool strict_week_number_year_type = false; int strict_week_number_year = -1; - bool usa_time = false; + bool hour_system_12 = false; auto [year, month, day, hour, minute, second, microsecond] = std::tuple {0, 0, 0, 0, 0, 0, 0}; while (ptr < end && val < val_end) { @@ -2186,7 +2222,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } // Check switch if (*ptr == '%' && ptr + 1 < end) { - const char* tmp = NULL; + const char* tmp = nullptr; int64_t int_value = 0; ptr++; switch (*ptr++) { @@ -2200,7 +2236,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co int_value += int_value >= 70 ? 1900 : 2000; year = int_value; val = tmp; - date_part_used = true; + part_used |= YEAR_PART; break; case 'Y': // Year, numeric, four digits @@ -2213,7 +2249,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } year = int_value; val = tmp; - date_part_used = true; + part_used |= YEAR_PART; break; // Month case 'm': @@ -2224,7 +2260,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } month = int_value; val = tmp; - date_part_used = true; + part_used |= MONTH_PART; break; case 'M': int_value = check_word(const_cast(s_month_name), val, val_end, &val); @@ -2232,6 +2268,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co return false; } month = int_value; + part_used |= MONTH_PART; break; case 'b': int_value = check_word(s_ab_month_name, val, val_end, &val); @@ -2239,6 +2276,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co return false; } month = int_value; + part_used |= MONTH_PART; break; // Day case 'd': @@ -2249,7 +2287,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } day = int_value; val = tmp; - date_part_used = true; + part_used |= DAY_PART; break; case 'D': tmp = val + min(2, val_end - val); @@ -2258,13 +2296,14 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } day = int_value; val = tmp + min(2, val_end - tmp); - date_part_used = true; + part_used |= DAY_PART; break; // Hour case 'h': case 'I': case 'l': - usa_time = true; + hour_system_12 = true; + part_used |= HOUR_PART; // Fall through case 'k': case 'H': @@ -2274,7 +2313,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } hour = int_value; val = tmp; - time_part_used = true; + part_used |= HOUR_PART; break; // Minute case 'i': @@ -2284,7 +2323,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } minute = int_value; val = tmp; - time_part_used = true; + part_used |= MINUTE_PART; break; // Second case 's': @@ -2295,7 +2334,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } second = int_value; val = tmp; - time_part_used = true; + part_used |= SECOND_PART; break; // Micro second case 'f': @@ -2317,21 +2356,19 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } if constexpr (is_datetime) { microsecond = int_value * int_exp10(6 - min(6, tmp - val)); - frac_part_used = true; + part_used |= FRAC_PART; } val = tmp; - time_part_used = true; break; // AM/PM case 'p': - if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !usa_time) { + if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !hour_system_12) { return false; } if (toupper(*val) == 'P') { // PM - day_part = 12; + half_day = 12; } - time_part_used = true; val += 2; break; // Weekday @@ -2342,7 +2379,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } int_value++; weekday = int_value; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'a': int_value = check_word(s_ab_day_name, val, val_end, &val); @@ -2351,7 +2388,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } int_value++; weekday = int_value; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'w': tmp = val + min(1, val_end - val); @@ -2366,7 +2403,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } weekday = int_value; val = tmp; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'j': tmp = val + min(3, val_end - val); @@ -2375,7 +2412,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } yearday = int_value; val = tmp; - date_part_used = true; + part_used |= YEARDAY_PART; break; case 'u': case 'v': @@ -2393,7 +2430,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co return false; } val = tmp; - date_part_used = true; + part_used |= WEEK_NUM_PART; break; // strict week number, must be used with %V or %v case 'x': @@ -2405,7 +2442,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } strict_week_number_year = int_value; val = tmp; - date_part_used = true; + part_used |= WEEK_NUM_PART; break; case 'r': { if constexpr (is_datetime) { @@ -2418,7 +2455,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co this->date_v2_value_.minute_ = tmp_val.minute(); this->date_v2_value_.second_ = tmp_val.second(); val = tmp; - time_part_used = true; + part_used |= TIME_PART; already_set_time_part = true; break; } else { @@ -2434,7 +2471,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co this->date_v2_value_.hour_ = tmp_val.hour(); this->date_v2_value_.minute_ = tmp_val.minute(); this->date_v2_value_.second_ = tmp_val.second(); - time_part_used = true; + part_used |= TIME_PART; already_set_time_part = true; val = tmp; break; @@ -2477,12 +2514,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } } - // ptr == format means input value string is "", not do parse format failed here - if (ptr == format) { - return false; - } - // continue to iterate pattern if has - // to find out if it has time part. + // for compatible with mysql, like something have %H:%i:%s format but no relative content... while (ptr < end) { if (*ptr == '%' && ptr + 1 < end) { ptr++; @@ -2499,7 +2531,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co case 'S': case 'p': case 'T': - time_part_used = true; + part_used |= TIME_PART; break; default: break; @@ -2509,25 +2541,27 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } } - if (usa_time) { + if (!part_used) { + return false; + } + + if (hour_system_12) { if (hour > 12 || hour < 1) { return false; } - hour = (hour % 12) + day_part; + hour = (hour % 12) + half_day; } if (sub_val_end) { *sub_val_end = val; } // Compute timestamp type - if (frac_part_used) { + if (part_used & FRAC_PART) { if constexpr (!is_datetime) { - LOG(WARNING) << "Microsecond is not allowed for date type!"; return false; } - } else if (time_part_used) { + } else if (part_used & TIME_PART) { if constexpr (!is_datetime) { - LOG(WARNING) << "Time part is not allowed for date type!"; return false; } } @@ -2566,7 +2600,9 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co // so we only need to set date part // 3. if both are true, means all part of date_time be set, no need check_range_and_set_time bool already_set_date_part = yearday > 0 || (week_num >= 0 && weekday > 0); - if (already_set_date_part && already_set_time_part) return true; + if (already_set_date_part && already_set_time_part) { + return true; + } if (already_set_date_part) { if constexpr (is_datetime) { return check_range_and_set_time(date_v2_value_.year_, date_v2_value_.month_, @@ -2576,6 +2612,16 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co date_v2_value_.day_, 0, 0, 0, 0); } } + // complete default month/day + if (!(part_used & ~NORMAL_DATE_PART)) { // Ymd part only + if (!(part_used & DAY_PART)) { + day = 1; + if (!(part_used & MONTH_PART)) { + month = 1; + } + } + } + if (already_set_time_part) { if constexpr (is_datetime) { return check_range_and_set_time(year, month, day, date_v2_value_.hour_, @@ -2587,7 +2633,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } if constexpr (is_datetime) { return check_range_and_set_time(year, month, day, hour, minute, second, microsecond, - time_part_used && !date_part_used); + !(part_used & ~TIME_PART)); } else { return check_range_and_set_time(year, month, day, 0, 0, 0, 0); } diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index e043aa9028d689..e3ea87de1f2c39 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -201,7 +201,7 @@ static constexpr uint32_t DATEV2_YEAR_WIDTH = 23; static constexpr uint32_t DATETIMEV2_YEAR_WIDTH = 18; static constexpr uint32_t DATETIMEV2_MONTH_WIDTH = 4; -static RE2 time_zone_offset_format_reg("^[+-]{1}\\d{2}\\:\\d{2}$"); +static RE2 time_zone_offset_format_reg(R"(^[+-]{1}\d{2}\:\d{2}$)"); uint8_t mysql_week_mode(uint32_t mode); diff --git a/be/test/vec/function/function_array_index_test.cpp b/be/test/vec/function/function_array_index_test.cpp index 497826019df627..bb61a7195cc117 100644 --- a/be/test/vec/function/function_array_index_test.cpp +++ b/be/test/vec/function/function_array_index_test.cpp @@ -115,10 +115,8 @@ TEST(function_array_index_test, array_contains) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, TypeIndex::Date}; - Array vec = {str_to_date_time("2022-01-02", false), str_to_date_time("", false), - str_to_date_time("2022-07-08", false)}; + Array vec = {str_to_date_time("2022-01-02", false), str_to_date_time("2022-07-08", false)}; DataSet data_set = {{{vec, std::string("2022-01-02")}, UInt8(1)}, - {{vec, std::string("")}, UInt8(1)}, {{vec, std::string("2022-01-03")}, UInt8(0)}, {{Null(), std::string("2022-01-04")}, Null()}, {{empty_arr, std::string("2022-01-02")}, UInt8(0)}}; @@ -130,10 +128,9 @@ TEST(function_array_index_test, array_contains) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, TypeIndex::DateTime}; - Array vec = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time(""), + Array vec = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time("2022-07-08 00:00:00")}; DataSet data_set = {{{vec, std::string("2022-01-02 00:00:00")}, UInt8(1)}, - {{vec, std::string("")}, UInt8(1)}, {{vec, std::string("2022-01-03 00:00:00")}, UInt8(0)}, {{Null(), std::string("2022-01-04 00:00:00")}, Null()}, {{empty_arr, std::string("2022-01-02 00:00:00")}, UInt8(0)}}; @@ -217,10 +214,8 @@ TEST(function_array_index_test, array_position) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, TypeIndex::Date}; - Array vec = {str_to_date_time("2022-01-02", false), str_to_date_time("", false), - str_to_date_time("2022-07-08", false)}; + Array vec = {str_to_date_time("2022-01-02", false), str_to_date_time("2022-07-08", false)}; DataSet data_set = {{{vec, std::string("2022-01-02")}, Int64(1)}, - {{vec, std::string("")}, Int64(2)}, {{vec, std::string("2022-01-03")}, Int64(0)}, {{Null(), std::string("2022-01-04")}, Null()}, {{empty_arr, std::string("2022-01-02")}, Int64(0)}}; @@ -232,10 +227,9 @@ TEST(function_array_index_test, array_position) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, TypeIndex::DateTime}; - Array vec = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time(""), + Array vec = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time("2022-07-08 00:00:00")}; DataSet data_set = {{{vec, std::string("2022-01-02 00:00:00")}, Int64(1)}, - {{vec, std::string("")}, Int64(2)}, {{vec, std::string("2022-01-03 00:00:00")}, Int64(0)}, {{Null(), std::string("2022-01-04 00:00:00")}, Null()}, {{empty_arr, std::string("2022-01-02 00:00:00")}, Int64(0)}}; diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index f7b72c935f354e..d62f411ee4c64d 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -52,18 +52,13 @@ #include "vec/data_types/data_type_number.h" #include "vec/functions/simple_function_factory.h" -namespace doris { -namespace vectorized { +namespace doris::vectorized { + class DataTypeJsonb; class DataTypeTime; class TableFunction; template class DataTypeDecimal; -} // namespace vectorized -} // namespace doris - -namespace doris::vectorized { - using InputDataSet = std::vector>; // without result using CellSet = std::vector; using Expect = AnyType; @@ -71,6 +66,7 @@ using Row = std::pair; using DataSet = std::vector; using InputTypeSet = std::vector; +// FIXME: should use exception or expected to deal null value.w int64_t str_to_date_time(std::string datetime_str, bool data_time = true); uint32_t str_to_date_v2(std::string datetime_str, std::string datetime_format); uint64_t str_to_datetime_v2(std::string datetime_str, std::string datetime_format); @@ -300,7 +296,7 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty if constexpr (std::is_same_v) { const auto& expect_data = any_cast(data_set[i].second); auto s = column->get_data_at(i); - if (expect_data.size() == 0) { + if (expect_data.empty()) { // zero size result means invalid EXPECT_EQ(0, s.size) << " invalid result size should be 0 at row " << i; } else { @@ -321,7 +317,8 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty } else if constexpr (std::is_same_v) { const ColumnBitmap* bitmap_col = nullptr; if constexpr (nullable) { - auto nullable_column = assert_cast(column.get()); + const auto* nullable_column = + assert_cast(column.get()); bitmap_col = assert_cast( nullable_column->get_nested_column_ptr().get()); } else { @@ -344,7 +341,9 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty if constexpr (nullable) { bool is_null = data_set[i].second.type() == &typeid(Null); EXPECT_EQ(is_null, column->is_null_at(i)) << " at row " << i; - if (!is_null) check_column_data(); + if (!is_null) { + check_column_data(); + } } else { check_column_data(); } diff --git a/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md b/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md index ab86ec15c2befd..54f2acd7597656 100644 --- a/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md +++ b/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md @@ -70,23 +70,23 @@ The formats available are: % m | month, numerical value (00-12) -%p | AM or PM +% p | AM or PM, only available on 12-hours system -% R | Time, 12 - hour (hh: mm: SS AM or PM) +% R | Time, 12-hour (hh:mm:ss), could be with or without AM/PM marking % S | seconds (00-59) % s | seconds (00-59) -% T | Time, 24 - hour (hh: mm: ss) +% T | Time, 24-hour (hh:mm:ss) % U | Week (00-53) Sunday is the first day of the week -% U | Week (00 - 53) Monday is the first day of the week +% U | Week (00-53) Monday is the first day of the week % V | Week (01-53) Sunday is the first day of the week, and% X is used. -% v | Week (01 - 53) Monday is the first day of the week, and% x is used +% v | Week (01-53) Monday is the first day of the week, and% x is used % W | Sunday diff --git a/docs/en/docs/sql-manual/sql-functions/date-time-functions/str-to-date.md b/docs/en/docs/sql-manual/sql-functions/date-time-functions/str-to-date.md index 1e2a7942cd0dce..4b46dc25543600 100644 --- a/docs/en/docs/sql-manual/sql-functions/date-time-functions/str-to-date.md +++ b/docs/en/docs/sql-manual/sql-functions/date-time-functions/str-to-date.md @@ -33,7 +33,7 @@ under the License. Convert STR to DATE type by format specified, if the conversion result does not return NULL. Note that the 'format' parameter specifies the format of the first parameter. -The `format` supported is consistent with [date_format](date_format.md) +All formats in [date_format](./date-format) are supported. In addition, support auto completing the remainder of date part for '%Y' and '%Y-%m'. ### example @@ -65,7 +65,13 @@ mysql> select str_to_date("2020-09-01", "%Y-%m-%d %H:%i:%s"); +------------------------------------------------+ | 2020-09-01 00:00:00 | +------------------------------------------------+ -1 row in set (0.01 sec) + +mysql> select str_to_date('2023','%Y'); ++---------------------------+ +| str_to_date('2023', '%Y') | ++---------------------------+ +| 2023-01-01 | ++---------------------------+ ``` ### keywords diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md index 69a6315d729f74..a5fb25f29861dc 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md @@ -70,9 +70,9 @@ date 参数是合法的日期。format 规定日期/时间的输出格式。 %m | 月,数值(00-12) -%p | AM 或 PM +%p | AM 或 PM,仅在采用12小时制时可用。 -%r | 时间,12-小时(hh:mm:ss AM 或 PM) +%r | 时间,12-小时(hh:mm:ss),可以包含或不包含AM/PM。 %S | 秒(00-59) diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/str-to-date.md b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/str-to-date.md index fc15fc31a68fe2..3c219a29fa92fa 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/str-to-date.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/str-to-date.md @@ -32,7 +32,7 @@ under the License. 通过format指定的方式将str转化为DATE类型,如果转化结果不对返回NULL。注意format指定的是第一个参数的格式。 -支持的format格式与[date_format](date_format.md)一致 +支持[date_format](./date-format)中的所有 format 格式,此外对于 '%Y' 和 '%Y-%m',支持补齐日期剩余部分。 ### example @@ -64,7 +64,13 @@ mysql> select str_to_date("2020-09-01", "%Y-%m-%d %H:%i:%s"); +------------------------------------------------+ | 2020-09-01 00:00:00 | +------------------------------------------------+ -1 row in set (0.01 sec) + +mysql> select str_to_date('2023','%Y'); ++---------------------------+ +| str_to_date('2023', '%Y') | ++---------------------------+ +| 2023-01-01 | ++---------------------------+ ``` ### keywords diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java index 7d924b0e68c92d..2816762f9437bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java @@ -1175,16 +1175,28 @@ public int hashCode() { // this method is exaclty same as from_date_format_str() in be/src/runtime/datetime_value.cpp // change this method should also change that. public int fromDateFormatStr(String format, String value, boolean hasSubVal) throws InvalidFormatException { - int fp = 0; // pointer to the current format string - int fend = format.length(); // end of format string - int vp = 0; // pointer to the date string value - int vend = value.length(); // end of date string value - - boolean datePartUsed = false; - boolean timePartUsed = false; - boolean microSecondPartUsed = false; - - int dayPart = 0; + int pFormat = 0; // pointer to the current format string + int endFormat = format.length(); // end of format string + int pValue = 0; // pointer to the date string value + int endValue = value.length(); // end of date string value + + int partUsed = 0; + final int yearPart = 1 << 0; + final int monthPart = 1 << 1; + final int dayPart = 1 << 2; + final int weekdayPart = 1 << 3; + final int yeardayPart = 1 << 4; + final int weekNumPart = 1 << 5; + final int normalDatePart = yearPart | monthPart | dayPart; + final int specialDatePart = weekdayPart | yeardayPart | weekNumPart; + final int datePart = normalDatePart | specialDatePart; + final int hourPart = 1 << 6; + final int minutePart = 1 << 7; + final int secondPart = 1 << 8; + final int fracPart = 1 << 9; + final int timePart = hourPart | minutePart | secondPart | fracPart; + + int halfDay = 0; // 0 for am/none, 12 for pm. long weekday = -1; long yearday = -1; long weekNum = -1; @@ -1193,169 +1205,170 @@ public int fromDateFormatStr(String format, String value, boolean hasSubVal) thr boolean sundayFirst = false; boolean strictWeekNumberYearType = false; long strictWeekNumberYear = -1; - boolean usaTime = false; + boolean hourSystem12 = false; // hour in [0..12] and with am/pm - char f; - while (fp < fend && vp < vend) { + char now; + while (pFormat < endFormat && pValue < endValue) { // Skip space character - while (vp < vend && Character.isSpaceChar(value.charAt(vp))) { - vp++; + while (pValue < endValue && Character.isSpaceChar(value.charAt(pValue))) { + pValue++; } - if (vp >= vend) { + if (pValue >= endValue) { break; } // Check switch - f = format.charAt(fp); - if (f == '%' && fp + 1 < fend) { + now = format.charAt(pFormat); + if (now == '%' && pFormat + 1 < endFormat) { int tmp = 0; long intValue = 0; - fp++; - f = format.charAt(fp); - fp++; - switch (f) { + pFormat++; + now = format.charAt(pFormat); + pFormat++; + switch (now) { // Year case 'y': // Year, numeric (two digits) - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); intValue += intValue >= 70 ? 1900 : 2000; this.year = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= yearPart; break; case 'Y': // Year, numeric, four digits - tmp = vp + Math.min(4, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); - if (tmp - vp <= 2) { + tmp = pValue + Math.min(4, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); + if (tmp - pValue <= 2) { intValue += intValue >= 70 ? 1900 : 2000; } this.year = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= yearPart; break; // Month case 'm': case 'c': - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); this.month = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= monthPart; break; case 'M': { - int nextPos = findWord(value, vp); - intValue = checkWord(MONTH_NAME_DICT, value.substring(vp, nextPos)); + int nextPos = findWord(value, pValue); + intValue = checkWord(MONTH_NAME_DICT, value.substring(pValue, nextPos)); this.month = intValue; - vp = nextPos; + pValue = nextPos; + partUsed |= monthPart; break; } case 'b': { - int nextPos = findWord(value, vp); - intValue = checkWord(MONTH_ABBR_NAME_DICT, value.substring(vp, nextPos)); + int nextPos = findWord(value, pValue); + intValue = checkWord(MONTH_ABBR_NAME_DICT, value.substring(pValue, nextPos)); this.month = intValue; - vp = nextPos; + pValue = nextPos; + partUsed |= monthPart; break; } // Day case 'd': case 'e': - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); this.day = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= dayPart; break; case 'D': - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); this.day = intValue; - vp = tmp + Math.min(2, vend - tmp); - datePartUsed = true; + pValue = tmp + Math.min(2, endValue - tmp); + partUsed |= dayPart; break; // Hour case 'h': case 'I': case 'l': - usaTime = true; + hourSystem12 = true; + partUsed |= hourPart; case 'k': // CHECKSTYLE IGNORE THIS LINE: Fall through case 'H': - tmp = findNumber(value, vp, 2); - intValue = strToLong(value.substring(vp, tmp)); + tmp = findNumber(value, pValue, 2); + intValue = strToLong(value.substring(pValue, tmp)); this.hour = intValue; - vp = tmp; - timePartUsed = true; + pValue = tmp; + partUsed |= hourPart; break; // Minute case 'i': - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); this.minute = intValue; - vp = tmp; - timePartUsed = true; + pValue = tmp; + partUsed |= minutePart; break; // Second case 's': case 'S': - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); this.second = intValue; - vp = tmp; - timePartUsed = true; + pValue = tmp; + partUsed |= secondPart; break; // Micro second case 'f': - // FIXME: fix same with BE - tmp = vp; + tmp = pValue; // when there's still something to the end, fix the scale of ms. - while (tmp < vend && Character.isDigit(value.charAt(tmp))) { + while (tmp < endValue && Character.isDigit(value.charAt(tmp))) { tmp += 1; } - if (tmp - vp > 6) { - int tmp2 = vp + 6; - intValue = strToLong(value.substring(vp, tmp2)); + if (tmp - pValue > 6) { + int tmp2 = pValue + 6; + intValue = strToLong(value.substring(pValue, tmp2)); } else { - intValue = strToLong(value.substring(vp, tmp)); + intValue = strToLong(value.substring(pValue, tmp)); } - this.microsecond = (long) (intValue * Math.pow(10, 6 - Math.min(6, tmp - vp))); - timePartUsed = true; - microSecondPartUsed = true; - vp = tmp; + this.microsecond = (long) (intValue * Math.pow(10, 6 - Math.min(6, tmp - pValue))); + partUsed |= fracPart; + pValue = tmp; break; // AM/PM case 'p': - if ((vend - vp) < 2 || Character.toUpperCase(value.charAt(vp + 1)) != 'M' || !usaTime) { + if ((endValue - pValue) < 2 || Character.toUpperCase(value.charAt(pValue + 1)) != 'M' + || !hourSystem12) { throw new InvalidFormatException("Invalid %p format"); } - if (Character.toUpperCase(value.charAt(vp)) == 'P') { + if (Character.toUpperCase(value.charAt(pValue)) == 'P') { // PM - dayPart = 12; + halfDay = 12; } - timePartUsed = true; - vp += 2; + pValue += 2; break; // Weekday case 'W': { - int nextPos = findWord(value, vp); - intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(vp, nextPos)); + int nextPos = findWord(value, pValue); + intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(pValue, nextPos)); intValue++; weekday = intValue; - datePartUsed = true; + partUsed |= weekdayPart; break; } case 'a': { - int nextPos = findWord(value, vp); - intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(vp, nextPos)); + int nextPos = findWord(value, pValue); + intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(pValue, nextPos)); intValue++; weekday = intValue; - datePartUsed = true; + partUsed |= weekdayPart; break; } case 'w': - tmp = vp + Math.min(1, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(1, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); if (intValue >= 7) { throw new InvalidFormatException("invalid day of week: " + intValue); } @@ -1363,97 +1376,97 @@ public int fromDateFormatStr(String format, String value, boolean hasSubVal) thr intValue = 7; } weekday = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= weekdayPart; break; case 'j': - tmp = vp + Math.min(3, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(3, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); yearday = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= yeardayPart; break; case 'u': case 'v': case 'U': case 'V': - sundayFirst = (format.charAt(fp - 1) == 'U' || format.charAt(fp - 1) == 'V'); + sundayFirst = (format.charAt(pFormat - 1) == 'U' || format.charAt(pFormat - 1) == 'V'); // Used to check if there is %x or %X - strictWeekNumber = (format.charAt(fp - 1) == 'V' || format.charAt(fp - 1) == 'v'); - tmp = vp + Math.min(2, vend - vp); - intValue = Long.valueOf(value.substring(vp, tmp)); + strictWeekNumber = (format.charAt(pFormat - 1) == 'V' || format.charAt(pFormat - 1) == 'v'); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = Long.valueOf(value.substring(pValue, tmp)); weekNum = intValue; if (weekNum > 53 || (strictWeekNumber && weekNum == 0)) { throw new InvalidFormatException("invalid num of week: " + weekNum); } - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= weekNumPart; break; // strict week number, must be used with %V or %v case 'x': case 'X': - strictWeekNumberYearType = (format.charAt(fp - 1) == 'X'); - tmp = vp + Math.min(4, vend - vp); - intValue = Long.valueOf(value.substring(vp, tmp)); + strictWeekNumberYearType = (format.charAt(pFormat - 1) == 'X'); + tmp = pValue + Math.min(4, endValue - pValue); + intValue = Long.valueOf(value.substring(pValue, tmp)); strictWeekNumberYear = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= weekNumPart; break; case 'r': - tmp = fromDateFormatStr("%I:%i:%S %p", value.substring(vp, vend), true); - vp = tmp; - timePartUsed = true; + tmp = fromDateFormatStr("%I:%i:%S %p", value.substring(pValue, endValue), true); + pValue = tmp; + partUsed |= timePart; break; case 'T': - tmp = fromDateFormatStr("%H:%i:%S", value.substring(vp, vend), true); - vp = tmp; - timePartUsed = true; + tmp = fromDateFormatStr("%H:%i:%S", value.substring(pValue, endValue), true); + pValue = tmp; + partUsed |= timePart; break; case '.': - while (vp < vend && Character.toString(value.charAt(vp)).matches("\\p{Punct}")) { - vp++; + while (pValue < endValue && Character.toString(value.charAt(pValue)).matches("\\p{Punct}")) { + pValue++; } break; case '@': - while (vp < vend && Character.isLetter(value.charAt(vp))) { - vp++; + while (pValue < endValue && Character.isLetter(value.charAt(pValue))) { + pValue++; } break; case '#': - while (vp < vend && Character.isDigit(value.charAt(vp))) { - vp++; + while (pValue < endValue && Character.isDigit(value.charAt(pValue))) { + pValue++; } break; case '%': // %%, escape the % - if ('%' != value.charAt(vp)) { - throw new InvalidFormatException("invalid char after %: " + value.charAt(vp)); + if ('%' != value.charAt(pValue)) { + throw new InvalidFormatException("invalid char after %: " + value.charAt(pValue)); } - vp++; + pValue++; break; default: - throw new InvalidFormatException("Invalid format pattern: " + f); + throw new InvalidFormatException("Invalid format pattern: " + now); } - } else if (format.charAt(fp) != ' ') { - if (format.charAt(fp) != value.charAt(vp)) { - throw new InvalidFormatException("Invalid char: " + value.charAt(vp) + ", expected: " - + format.charAt(fp)); + } else if (format.charAt(pFormat) != ' ') { + if (format.charAt(pFormat) != value.charAt(pValue)) { + throw new InvalidFormatException("Invalid char: " + value.charAt(pValue) + ", expected: " + + format.charAt(pFormat)); } - fp++; - vp++; + pFormat++; + pValue++; } else { - fp++; + pFormat++; } } // continue to iterate pattern if has // to find out if it has time part. - while (fp < fend) { - f = format.charAt(fp); - if (f == '%' && fp + 1 < fend) { - fp++; - f = format.charAt(fp); - fp++; - switch (f) { + while (pFormat < endFormat) { + now = format.charAt(pFormat); + if (now == '%' && pFormat + 1 < endFormat) { + pFormat++; + now = format.charAt(pFormat); + pFormat++; + switch (now) { case 'H': case 'h': case 'I': @@ -1465,25 +1478,29 @@ public int fromDateFormatStr(String format, String value, boolean hasSubVal) thr case 'S': case 'p': case 'T': - timePartUsed = true; + partUsed |= timePart; break; default: break; } } else { - fp++; + pFormat++; } } - if (usaTime) { + if (partUsed == 0) { + throw new InvalidFormatException("Nothing for legal Date: " + value); + } + + if (hourSystem12) { if (this.hour > 12 || this.hour < 1) { throw new InvalidFormatException("Invalid hour: " + hour); } - this.hour = (this.hour % 12) + dayPart; + this.hour = (this.hour % 12) + halfDay; } if (hasSubVal) { - return vp; + return pValue; } // Year day @@ -1512,13 +1529,21 @@ public int fromDateFormatStr(String format, String value, boolean hasSubVal) thr getDateFromDaynr(days); } + // complete default month/day + if ((partUsed & ~normalDatePart) == 0) { // only date here + if ((partUsed & dayPart) == 0) { + day = 1; + if ((partUsed & monthPart) == 0) { + month = 1; + } + } + } + // Compute timestamp type - // TODO(Gabriel): we still use old version datetime/date and change this to new version when - // we think it's stable enough - if (datePartUsed) { - if (microSecondPartUsed) { + if ((partUsed & datePart) != 0) { // Ymd part only + if ((partUsed & fracPart) != 0) { this.type = Type.DATETIMEV2_WITH_MAX_SCALAR; - } else if (timePartUsed) { + } else if ((partUsed & timePart) != 0) { this.type = ScalarType.getDefaultDateType(Type.DATETIME); } else { this.type = ScalarType.getDefaultDateType(Type.DATE); diff --git a/regression-test/data/correctness/test_str_to_date.out b/regression-test/data/correctness/test_str_to_date.out index af6342ecf667c0..633b590ad9803b 100644 --- a/regression-test/data/correctness/test_str_to_date.out +++ b/regression-test/data/correctness/test_str_to_date.out @@ -13,6 +13,18 @@ -- !select4 -- 2020-12-03T11:45:14 +-- !short_nereids_1 -- +2023-01-01 + +-- !short_nereids_2 -- +2023-12-01 + +-- !short_nereids_3 -- +2023-01-01 + +-- !short_nereids_4 -- +2020-02-01 + -- !select5 -- 2019-12-01 yyyy-MM-dd 2019-12-01T00:00 20201203 yyyyMMdd 2020-12-03T00:00 @@ -27,3 +39,15 @@ -- !select8 -- 2020-12-03T11:45:14 +-- !short_legacy_1 -- +2023-01-01 + +-- !short_legacy_2 -- +2023-12-01 + +-- !short_legacy_3 -- +2023-01-01 + +-- !short_legacy_4 -- +2020-02-01 + diff --git a/regression-test/data/datatype_p0/date/test_invalid_date.out b/regression-test/data/datatype_p0/date/test_invalid_date.out deleted file mode 100644 index 80b3c3963d3f42..00000000000000 --- a/regression-test/data/datatype_p0/date/test_invalid_date.out +++ /dev/null @@ -1,7 +0,0 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !sql1 -- -\N - --- !sql2 -- -\N - diff --git a/regression-test/suites/correctness/test_str_to_date.groovy b/regression-test/suites/correctness/test_str_to_date.groovy index 6c26a8db24ba22..300f9cb92167e7 100644 --- a/regression-test/suites/correctness/test_str_to_date.groovy +++ b/regression-test/suites/correctness/test_str_to_date.groovy @@ -36,43 +36,40 @@ suite("test_str_to_date") { sql """ INSERT INTO test_str_to_date_db VALUES(2,'20201203', 'yyyyMMdd');""" sql """ INSERT INTO test_str_to_date_db VALUES(3,'2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss');""" - sql """ set enable_nereids_planner=true , enable_fallback_to_original_planner=false;""" - - +sql """ set enable_nereids_planner=true , enable_fallback_to_original_planner=false;""" qt_select1 """ select s1,s2,STR_TO_DATE(s1,s2) from test_str_to_date_db order by id; """ - qt_select2 """ SELECT STR_TO_DATE('2019-12-01', 'yyyy-MM-dd'); """ - qt_select3 """ SELECT STR_TO_DATE('20201203', 'yyyyMMdd'); """ - qt_select4 """ SELECT STR_TO_DATE('2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss'); """ + qt_short_nereids_1 " select STR_TO_DATE('2023', '%Y') " + qt_short_nereids_2 " select STR_TO_DATE('2023-12', '%Y-%m') " + qt_short_nereids_3 " select STR_TO_DATE('2023-12', '%Y')" + qt_short_nereids_4 " select STR_TO_DATE('2020%2', '%Y%%%m')" - sql """ set enable_nereids_planner=false;""" - +sql """ set enable_nereids_planner=false;""" qt_select5 """ select s1,s2,STR_TO_DATE(s1,s2) from test_str_to_date_db order by id; """ - qt_select6 """ SELECT STR_TO_DATE('2019-12-01', 'yyyy-MM-dd'); """ - qt_select7 """ SELECT STR_TO_DATE('20201203', 'yyyyMMdd'); """ - qt_select8 """ SELECT STR_TO_DATE('2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss'); """ - - + qt_short_legacy_1 " select STR_TO_DATE('2023', '%Y') " + qt_short_legacy_2 " select STR_TO_DATE('2023-12', '%Y-%m') " + qt_short_legacy_3 " select STR_TO_DATE('2023-12', '%Y')" + qt_short_legacy_4 " select STR_TO_DATE('2020%2', '%Y%%%m')" } diff --git a/regression-test/suites/datatype_p0/date/test_invalid_date.groovy b/regression-test/suites/datatype_p0/date/test_invalid_date.groovy deleted file mode 100644 index 6b683d90b62763..00000000000000 --- a/regression-test/suites/datatype_p0/date/test_invalid_date.groovy +++ /dev/null @@ -1,37 +0,0 @@ - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("test_invalid_date") { - def tbName = "test_invalid_date" - sql "DROP TABLE IF EXISTS ${tbName}" - sql """ - CREATE TABLE IF NOT EXISTS ${tbName} ( - c0 int, - c1 char(10), - c2 date, - c3 datev2 - ) - UNIQUE KEY(c0) - DISTRIBUTED BY HASH(c0) BUCKETS 5 properties("replication_num" = "1"); - """ - sql "insert into ${tbName} values(1, 'test1', '2000-01-01', '2000-01-01')" - - qt_sql1 "select str_to_date('202301', '%Y%m');" - qt_sql2 "select str_to_date('202301', '%Y%m') from ${tbName}" - sql "DROP TABLE ${tbName}" -} From e7a9bd476654b49a7e125cf21e40ad9556885eb0 Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Mon, 5 Feb 2024 00:10:07 +0800 Subject: [PATCH 2/2] fix case --- .../nereids_function_p0/scalar_function/U.out | 100 +++++++++--------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/regression-test/data/nereids_function_p0/scalar_function/U.out b/regression-test/data/nereids_function_p0/scalar_function/U.out index fa8adc5f85f5aa..36700e53a91109 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/U.out +++ b/regression-test/data/nereids_function_p0/scalar_function/U.out @@ -174,62 +174,62 @@ 1331481600 -- !sql_unix_timestamp_Varchar_Varchar -- -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N -- !sql_unix_timestamp_Varchar_Varchar_notnull -- -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N -- !sql_unix_timestamp_String_String -- -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N -- !sql_unix_timestamp_String_String_notnull -- -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N -- !sql_upper_Varchar -- NULL