diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index 8df03496c11fbe..e0f785b0ef6f92 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -94,13 +94,13 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum return ColumnNullable::create(src_not_nullable, result_null_map_column); } -bool get_null_presence(const Block& block, const ColumnNumbers& args) { +bool have_null_column(const Block& block, const ColumnNumbers& args) { return std::ranges::any_of(args, [&block](const auto& elem) { return block.get_by_position(elem).type->is_nullable(); }); } -bool get_null_presence(const ColumnsWithTypeAndName& args) { +bool have_null_column(const ColumnsWithTypeAndName& args) { return std::ranges::any_of(args, [](const auto& elem) { return elem.type->is_nullable(); }); } @@ -202,20 +202,21 @@ Status PreparedFunctionImpl::default_implementation_for_nulls( return Status::OK(); } - if (get_null_presence(block, args)) { + if (have_null_column(block, args)) { bool need_to_default = need_replace_null_data_to_default(); if (context) { need_to_default &= context->check_overflow_for_decimal(); } + // extract nested column from nulls ColumnNumbers new_args; for (auto arg : args) { new_args.push_back(block.columns()); block.insert(block.get_by_position(arg).get_nested(need_to_default)); DCHECK(!block.get_by_position(new_args.back()).column->is_nullable()); } - RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, block, new_args, result, block.rows(), dry_run)); + // after run with nested, wrap them in null. block.get_by_position(result).column = wrap_in_nullable( block.get_by_position(result).column, block, args, result, input_rows_count); @@ -267,7 +268,7 @@ DataTypePtr FunctionBuilderImpl::get_return_type_without_low_cardinality( check_number_of_arguments(arguments.size()); if (!arguments.empty() && use_default_implementation_for_nulls()) { - if (get_null_presence(arguments)) { + if (have_null_column(arguments)) { ColumnNumbers numbers(arguments.size()); std::iota(numbers.begin(), numbers.end(), 0); auto [nested_block, _] = diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 73f49a237ae6f1..df5d8d60942e30 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -65,8 +65,8 @@ concept HasGetVariadicArgumentTypesImpl = requires(T t) { { t.get_variadic_argument_types_impl() } -> std::same_as; }; -bool get_null_presence(const Block& block, const ColumnNumbers& args); -bool get_null_presence(const ColumnsWithTypeAndName& args); +bool have_null_column(const Block& block, const ColumnNumbers& args); +bool have_null_column(const ColumnsWithTypeAndName& args); /// The simplest executable object. /// Motivation: @@ -288,9 +288,11 @@ class FunctionBuilderImpl : public IFunctionBuilder { bool is_variadic() const override { return false; } - /// Default implementation. Will check only in non-variadic case. + // Default implementation. Will check only in non-variadic case. void check_number_of_arguments(size_t number_of_arguments) const override; - + // the return type should be same with what FE plans. + // it returns: `get_return_type_impl` if `use_default_implementation_for_nulls` = false + // `get_return_type_impl` warpped in NULL if `use_default_implementation_for_nulls` = true and input has NULL DataTypePtr get_return_type(const ColumnsWithTypeAndName& arguments) const; DataTypes get_variadic_argument_types() const override { @@ -300,7 +302,9 @@ class FunctionBuilderImpl : public IFunctionBuilder { ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; } protected: - /// Get the result type by argument type. If the function does not apply to these arguments, throw an exception. + // Get the result type by argument type. If the function does not apply to these arguments, throw an exception. + // the get_return_type_impl and its overrides should only return the nested type if `use_default_implementation_for_nulls` is true. + // whether to wrap in nullable type will be automatically decided. virtual DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const { DataTypes data_types(arguments.size()); for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type; @@ -317,7 +321,7 @@ class FunctionBuilderImpl : public IFunctionBuilder { * if some of arguments are Nullable(Nothing) then don't call get_return_type(), call build_impl() with return_type = Nullable(Nothing), * if some of arguments are Nullable, then: * - Nullable types are substituted with nested types for get_return_type() function - * - wrap get_return_type() result in Nullable type and pass to build_impl + * - WRAP get_return_type() RESULT IN NULLABLE type and pass to build_impl * * Otherwise build returns build_impl(arguments, get_return_type(arguments)); */ diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 88147ec6cd9871..6886d53004b2ef 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -700,7 +700,7 @@ Status execute_bitmap_op_count_null_to_zero( size_t input_rows_count, const std::function& exec_impl_func) { - if (get_null_presence(block, arguments)) { + if (have_null_column(block, arguments)) { auto [temporary_block, new_args, new_result] = create_block_with_nested_columns(block, arguments, result); RETURN_IF_ERROR(exec_impl_func(context, temporary_block, new_args, new_result, diff --git a/be/src/vec/functions/nullif.cpp b/be/src/vec/functions/nullif.cpp index 315ca52d1bc423..928fb1c076780b 100644 --- a/be/src/vec/functions/nullif.cpp +++ b/be/src/vec/functions/nullif.cpp @@ -75,7 +75,7 @@ class FunctionNullIf : public IFunction { } if (!arguments.empty()) { - if (get_null_presence(arguments)) { + if (have_null_column(arguments)) { return make_nullable(std::make_shared()); } } diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 58c6c25f9ba9bc..334779f93901b6 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -19,20 +19,18 @@ #include #include -#include #include -#include -#include -#include + +#include +#include +#include +#include // IWYU pragma: no_include #include -#include #include // IWYU pragma: keep // IWYU pragma: no_include #include -#include -#include -#include +#include #include #include "common/compiler_util.h" @@ -393,10 +391,7 @@ bool VecDateTimeValue::from_time_int64(int64_t value) { return false; } _second = value % 100; - if (_second > TIME_MAX_SECOND) { - return false; - } - return true; + return _second <= TIME_MAX_SECOND; } char* VecDateTimeValue::append_date_buffer(char* to) const { @@ -686,7 +681,7 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c } char buf[64]; char* cursor = buf; - char* pos = NULL; + char* pos = nullptr; const char* ptr = format; const char* end = format + len; char ch = '\0'; @@ -1145,12 +1140,9 @@ static bool str_to_int64(const char* ptr, const char** endptr, int64_t* ret) { uint64_t value_3 = 0; // Check overflow. - if (value_1 > cutoff_1 || - (value_1 == cutoff_1 && - (value_2 > cutoff_2 || (value_2 == cutoff_2 && value_3 > cutoff_3)))) { - return false; - } - return true; + return value_1 <= cutoff_1 && + (value_1 != cutoff_1 || + (value_2 <= cutoff_2 && (value_2 != cutoff_2 || value_3 <= cutoff_3))); } static int min(int a, int b) { @@ -1161,7 +1153,7 @@ static int find_in_lib(const char* lib[], const char* str, const char* end) { int pos = 0; int find_count = 0; int find_pos = 0; - for (; lib[pos] != NULL; ++pos) { + for (; lib[pos] != nullptr; ++pos) { const char* i = str; const char* j = lib[pos]; while (i < end && *j) { @@ -1199,26 +1191,41 @@ static int check_word(const char* lib[], const char* str, const char* end, const // change this method should also change that. bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, const char* value, int value_len, const char** sub_val_end) { + if (value_len <= 0) [[unlikely]] { + return false; + } const char* ptr = format; const char* end = format + format_len; const char* val = value; const char* val_end = value + value_len; - bool date_part_used = false; - bool time_part_used = false; - bool frac_part_used = false; - bool already_set_time_part = false; - - int day_part = 0; + bool already_set_time_part = false; // skip time part in the end's setting. + + uint32_t part_used = 0; + constexpr int YEAR_PART = 1U << 0; + constexpr int MONTH_PART = 1U << 1; + constexpr int DAY_PART = 1U << 2; + constexpr int NORMAL_DATE_PART = YEAR_PART | MONTH_PART | DAY_PART; + constexpr int WEEKDAY_PART = 1U << 3; + constexpr int YEARDAY_PART = 1U << 4; + constexpr int WEEK_NUM_PART = 1U << 5; + constexpr int SPECIAL_DATE_PART = WEEKDAY_PART | YEARDAY_PART | WEEK_NUM_PART; + [[maybe_unused]] constexpr int DATE_PART = NORMAL_DATE_PART | SPECIAL_DATE_PART; + constexpr int HOUR_PART = 1U << 6; + constexpr int MINUTE_PART = 1U << 7; + constexpr int SECOND_PART = 1U << 8; + constexpr int TIME_PART = HOUR_PART | MINUTE_PART | SECOND_PART; + + int half_day = 0; // 0 for am/none, 12 for pm. int weekday = -1; int yearday = -1; - int week_num = -1; + int week_num = -1; // week idx in one year bool strict_week_number = false; bool sunday_first = false; bool strict_week_number_year_type = false; int strict_week_number_year = -1; - bool usa_time = false; + bool hour_system_12 = false; auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0, 0}; while (ptr < end && val < val_end) { @@ -1231,7 +1238,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } // Check switch if (*ptr == '%' && ptr + 1 < end) { - const char* tmp = NULL; + const char* tmp = nullptr; int64_t int_value = 0; ptr++; switch (*ptr++) { @@ -1245,7 +1252,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, int_value += int_value >= 70 ? 1900 : 2000; year = int_value; val = tmp; - date_part_used = true; + part_used |= YEAR_PART; break; case 'Y': // Year, numeric, four digits @@ -1258,7 +1265,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } year = int_value; val = tmp; - date_part_used = true; + part_used |= YEAR_PART; break; // Month case 'm': @@ -1269,7 +1276,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } month = int_value; val = tmp; - date_part_used = true; + part_used |= MONTH_PART; break; case 'M': int_value = check_word(const_cast(s_month_name), val, val_end, &val); @@ -1277,6 +1284,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, return false; } month = int_value; + part_used |= MONTH_PART; break; case 'b': int_value = check_word(s_ab_month_name, val, val_end, &val); @@ -1284,6 +1292,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, return false; } month = int_value; + part_used |= MONTH_PART; break; // Day case 'd': @@ -1294,7 +1303,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } day = int_value; val = tmp; - date_part_used = true; + part_used |= DAY_PART; break; case 'D': tmp = val + min(2, val_end - val); @@ -1303,13 +1312,14 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } day = int_value; val = tmp + min(2, val_end - tmp); - date_part_used = true; + part_used |= DAY_PART; break; // Hour case 'h': case 'I': case 'l': - usa_time = true; + hour_system_12 = true; + part_used |= HOUR_PART; // Fall through case 'k': case 'H': @@ -1319,7 +1329,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } hour = int_value; val = tmp; - time_part_used = true; + part_used |= HOUR_PART; break; // Minute case 'i': @@ -1329,7 +1339,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } minute = int_value; val = tmp; - time_part_used = true; + part_used |= MINUTE_PART; break; // Second case 's': @@ -1340,7 +1350,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } second = int_value; val = tmp; - time_part_used = true; + part_used |= SECOND_PART; break; // Micro second case 'f': @@ -1351,16 +1361,15 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } val = tmp; break; - // AM/PM + // AM/PM, only meaningful for 12-hour system. case 'p': - if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !usa_time) { + if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !hour_system_12) { return false; } if (toupper(*val) == 'P') { // PM - day_part = 12; + half_day = 12; } - time_part_used = true; val += 2; break; // Weekday @@ -1371,7 +1380,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } int_value++; weekday = int_value; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'a': int_value = check_word(s_ab_day_name, val, val_end, &val); @@ -1380,7 +1389,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } int_value++; weekday = int_value; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'w': tmp = val + min(1, val_end - val); @@ -1395,7 +1404,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } weekday = int_value; val = tmp; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'j': tmp = val + min(3, val_end - val); @@ -1404,7 +1413,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } yearday = int_value; val = tmp; - date_part_used = true; + part_used |= YEARDAY_PART; break; case 'u': case 'v': @@ -1422,7 +1431,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, return false; } val = tmp; - date_part_used = true; + part_used |= WEEK_NUM_PART; break; // strict week number, must be used with %V or %v case 'x': @@ -1434,7 +1443,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } strict_week_number_year = int_value; val = tmp; - date_part_used = true; + part_used |= WEEK_NUM_PART; break; case 'r': { VecDateTimeValue tmp_val; @@ -1445,7 +1454,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, this->_minute = tmp_val._minute; this->_second = tmp_val._second; val = tmp; - time_part_used = true; + part_used |= TIME_PART; already_set_time_part = true; break; } @@ -1457,7 +1466,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, this->_hour = tmp_val._hour; this->_minute = tmp_val._minute; this->_second = tmp_val._second; - time_part_used = true; + part_used |= TIME_PART; already_set_time_part = true; val = tmp; break; @@ -1497,8 +1506,7 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } } - // continue to iterate pattern if has - // to find out if it has time part. + // for compatible with mysql, like something have %H:%i:%s format but no relative content... while (ptr < end) { if (*ptr == '%' && ptr + 1 < end) { ptr++; @@ -1511,10 +1519,11 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, case 'l': case 'r': case 's': + case 'f': case 'S': case 'p': case 'T': - time_part_used = true; + part_used |= TIME_PART; break; default: break; @@ -1524,33 +1533,29 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, } } - if (usa_time) { + if (!part_used) { + return false; + } + + if (hour_system_12) { if (hour > 12 || hour < 1) { return false; } - hour = (hour % 12) + day_part; + hour = (hour % 12) + half_day; } if (sub_val_end) { *sub_val_end = val; } // Compute timestamp type - if (frac_part_used) { - if (date_part_used) { + if (part_used & DATE_PART) { + if (part_used & TIME_PART) { _type = TIME_DATETIME; } else { - _type = TIME_TIME; + _type = TIME_DATE; } } else { - if (date_part_used) { - if (time_part_used) { - _type = TIME_DATETIME; - } else { - _type = TIME_DATE; - } - } else { - _type = TIME_TIME; - } + _type = TIME_TIME; } _neg = false; @@ -1592,12 +1597,13 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, if (already_set_date_part && already_set_time_part) { return true; } - // for two special date cases, complete default month/day - if (!time_part_used && year > 0) { - if (std::string_view {format, end} == "%Y") { - month = day = 1; - } else if (std::string_view {format, end} == "%Y-%m") { + // complete default month/day + if (!(part_used & ~NORMAL_DATE_PART)) { // Ymd part only + if (!(part_used & DAY_PART)) { day = 1; + if (!(part_used & MONTH_PART)) { + month = 1; + } } } @@ -1614,7 +1620,9 @@ bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, template bool VecDateTimeValue::date_add_interval(const TimeInterval& interval) { if constexpr (need_check) { - if (!is_valid_date()) return false; + if (!is_valid_date()) { + return false; + } } int sign = interval.is_neg ? -1 : 1; @@ -1771,7 +1779,7 @@ void VecDateTimeValue::from_unixtime(int64_t timestamp, const cctz::time_zone& c const char* VecDateTimeValue::month_name() const { if (_month < 1 || _month > 12) { - return NULL; + return nullptr; } return s_month_name[_month]; } @@ -1779,14 +1787,14 @@ const char* VecDateTimeValue::month_name() const { const char* VecDateTimeValue::day_name() const { int day = weekday(); if (day < 0 || day >= 7) { - return NULL; + return nullptr; } return s_day_name[day]; } VecDateTimeValue VecDateTimeValue::local_time() { VecDateTimeValue value; - value.from_unixtime(time(NULL), TimezoneUtils::default_time_zone); + value.from_unixtime(time(nullptr), TimezoneUtils::default_time_zone); return value; } @@ -2170,17 +2178,33 @@ void DateV2Value::set_zero() { template bool DateV2Value::from_date_format_str(const char* format, int format_len, const char* value, int value_len, const char** sub_val_end) { + if (value_len <= 0) [[unlikely]] { + return false; + } const char* ptr = format; const char* end = format + format_len; const char* val = value; const char* val_end = value + value_len; - bool date_part_used = false; - bool time_part_used = false; - bool frac_part_used = false; - bool already_set_time_part = false; - - int day_part = 0; + bool already_set_time_part = false; // skip time part in the end's setting. + + uint32_t part_used = 0; + constexpr int YEAR_PART = 1U << 0; + constexpr int MONTH_PART = 1U << 1; + constexpr int DAY_PART = 1U << 2; + constexpr int NORMAL_DATE_PART = YEAR_PART | MONTH_PART | DAY_PART; + constexpr int WEEKDAY_PART = 1U << 3; + constexpr int YEARDAY_PART = 1U << 4; + constexpr int WEEK_NUM_PART = 1U << 5; + constexpr int SPECIAL_DATE_PART = WEEKDAY_PART | YEARDAY_PART | WEEK_NUM_PART; + [[maybe_unused]] constexpr int DATE_PART = NORMAL_DATE_PART | SPECIAL_DATE_PART; + constexpr int HOUR_PART = 1U << 6; + constexpr int MINUTE_PART = 1U << 7; + constexpr int SECOND_PART = 1U << 8; + constexpr int FRAC_PART = 1U << 9; + constexpr int TIME_PART = HOUR_PART | MINUTE_PART | SECOND_PART | FRAC_PART; + + int half_day = 0; // 0 for am/none, 12 for pm. int weekday = -1; int yearday = -1; int week_num = -1; @@ -2189,7 +2213,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co bool sunday_first = false; bool strict_week_number_year_type = false; int strict_week_number_year = -1; - bool usa_time = false; + bool hour_system_12 = false; auto [year, month, day, hour, minute, second, microsecond] = std::tuple {0, 0, 0, 0, 0, 0, 0}; while (ptr < end && val < val_end) { @@ -2202,7 +2226,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } // Check switch if (*ptr == '%' && ptr + 1 < end) { - const char* tmp = NULL; + const char* tmp = nullptr; int64_t int_value = 0; ptr++; switch (*ptr++) { @@ -2216,7 +2240,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co int_value += int_value >= 70 ? 1900 : 2000; year = int_value; val = tmp; - date_part_used = true; + part_used |= YEAR_PART; break; case 'Y': // Year, numeric, four digits @@ -2229,7 +2253,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } year = int_value; val = tmp; - date_part_used = true; + part_used |= YEAR_PART; break; // Month case 'm': @@ -2240,7 +2264,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } month = int_value; val = tmp; - date_part_used = true; + part_used |= MONTH_PART; break; case 'M': int_value = check_word(const_cast(s_month_name), val, val_end, &val); @@ -2248,6 +2272,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co return false; } month = int_value; + part_used |= MONTH_PART; break; case 'b': int_value = check_word(s_ab_month_name, val, val_end, &val); @@ -2255,6 +2280,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co return false; } month = int_value; + part_used |= MONTH_PART; break; // Day case 'd': @@ -2265,7 +2291,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } day = int_value; val = tmp; - date_part_used = true; + part_used |= DAY_PART; break; case 'D': tmp = val + min(2, val_end - val); @@ -2274,13 +2300,14 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } day = int_value; val = tmp + min(2, val_end - tmp); - date_part_used = true; + part_used |= DAY_PART; break; // Hour case 'h': case 'I': case 'l': - usa_time = true; + hour_system_12 = true; + part_used |= HOUR_PART; // Fall through case 'k': case 'H': @@ -2290,7 +2317,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } hour = int_value; val = tmp; - time_part_used = true; + part_used |= HOUR_PART; break; // Minute case 'i': @@ -2300,7 +2327,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } minute = int_value; val = tmp; - time_part_used = true; + part_used |= MINUTE_PART; break; // Second case 's': @@ -2311,7 +2338,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } second = int_value; val = tmp; - time_part_used = true; + part_used |= SECOND_PART; break; // Micro second case 'f': @@ -2333,21 +2360,19 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } if constexpr (is_datetime) { microsecond = int_value * int_exp10(6 - min(6, tmp - val)); - frac_part_used = true; + part_used |= FRAC_PART; } val = tmp; - time_part_used = true; break; // AM/PM case 'p': - if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !usa_time) { + if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !hour_system_12) { return false; } if (toupper(*val) == 'P') { // PM - day_part = 12; + half_day = 12; } - time_part_used = true; val += 2; break; // Weekday @@ -2358,7 +2383,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } int_value++; weekday = int_value; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'a': int_value = check_word(s_ab_day_name, val, val_end, &val); @@ -2367,7 +2392,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } int_value++; weekday = int_value; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'w': tmp = val + min(1, val_end - val); @@ -2382,7 +2407,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } weekday = int_value; val = tmp; - date_part_used = true; + part_used |= WEEKDAY_PART; break; case 'j': tmp = val + min(3, val_end - val); @@ -2391,7 +2416,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } yearday = int_value; val = tmp; - date_part_used = true; + part_used |= YEARDAY_PART; break; case 'u': case 'v': @@ -2409,7 +2434,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co return false; } val = tmp; - date_part_used = true; + part_used |= WEEK_NUM_PART; break; // strict week number, must be used with %V or %v case 'x': @@ -2421,7 +2446,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } strict_week_number_year = int_value; val = tmp; - date_part_used = true; + part_used |= WEEK_NUM_PART; break; case 'r': { if constexpr (is_datetime) { @@ -2434,7 +2459,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co this->date_v2_value_.minute_ = tmp_val.minute(); this->date_v2_value_.second_ = tmp_val.second(); val = tmp; - time_part_used = true; + part_used |= TIME_PART; already_set_time_part = true; break; } else { @@ -2450,7 +2475,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co this->date_v2_value_.hour_ = tmp_val.hour(); this->date_v2_value_.minute_ = tmp_val.minute(); this->date_v2_value_.second_ = tmp_val.second(); - time_part_used = true; + part_used |= TIME_PART; already_set_time_part = true; val = tmp; break; @@ -2493,12 +2518,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } } - // ptr == format means input value string is "", not do parse format failed here - if (ptr == format) { - return false; - } - // continue to iterate pattern if has - // to find out if it has time part. + // for compatible with mysql, like something have %H:%i:%s format but no relative content... while (ptr < end) { if (*ptr == '%' && ptr + 1 < end) { ptr++; @@ -2515,7 +2535,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co case 'S': case 'p': case 'T': - time_part_used = true; + part_used |= TIME_PART; break; default: break; @@ -2525,25 +2545,27 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } } - if (usa_time) { + if (!part_used) { + return false; + } + + if (hour_system_12) { if (hour > 12 || hour < 1) { return false; } - hour = (hour % 12) + day_part; + hour = (hour % 12) + half_day; } if (sub_val_end) { *sub_val_end = val; } // Compute timestamp type - if (frac_part_used) { + if (part_used & FRAC_PART) { if constexpr (!is_datetime) { - LOG(WARNING) << "Microsecond is not allowed for date type!"; return false; } - } else if (time_part_used) { + } else if (part_used & TIME_PART) { if constexpr (!is_datetime) { - LOG(WARNING) << "Time part is not allowed for date type!"; return false; } } @@ -2582,7 +2604,9 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co // so we only need to set date part // 3. if both are true, means all part of date_time be set, no need check_range_and_set_time bool already_set_date_part = yearday > 0 || (week_num >= 0 && weekday > 0); - if (already_set_date_part && already_set_time_part) return true; + if (already_set_date_part && already_set_time_part) { + return true; + } if (already_set_date_part) { if constexpr (is_datetime) { return check_range_and_set_time(date_v2_value_.year_, date_v2_value_.month_, @@ -2592,13 +2616,13 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co date_v2_value_.day_, 0, 0, 0, 0); } } - - // for two special date cases, complete default month/day - if (!time_part_used && year > 0) { - if (std::string_view {format, end} == "%Y") { - month = day = 1; - } else if (std::string_view {format, end} == "%Y-%m") { + // complete default month/day + if (!(part_used & ~NORMAL_DATE_PART)) { // Ymd part only + if (!(part_used & DAY_PART)) { day = 1; + if (!(part_used & MONTH_PART)) { + month = 1; + } } } @@ -2613,7 +2637,7 @@ bool DateV2Value::from_date_format_str(const char* format, int format_len, co } if constexpr (is_datetime) { return check_range_and_set_time(year, month, day, hour, minute, second, microsecond, - time_part_used && !date_part_used); + !(part_used & ~TIME_PART)); } else { return check_range_and_set_time(year, month, day, 0, 0, 0, 0); } diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index ad3619aba53255..7b138b2bc3a1f1 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -206,7 +206,7 @@ static constexpr uint32_t DATEV2_YEAR_WIDTH = 23; static constexpr uint32_t DATETIMEV2_YEAR_WIDTH = 18; static constexpr uint32_t DATETIMEV2_MONTH_WIDTH = 4; -static RE2 time_zone_offset_format_reg("^[+-]{1}\\d{2}\\:\\d{2}$"); +static RE2 time_zone_offset_format_reg(R"(^[+-]{1}\d{2}\:\d{2}$)"); uint8_t mysql_week_mode(uint32_t mode); diff --git a/be/test/vec/function/function_array_index_test.cpp b/be/test/vec/function/function_array_index_test.cpp index 7f496dc1ccee09..92c7901bbfe112 100644 --- a/be/test/vec/function/function_array_index_test.cpp +++ b/be/test/vec/function/function_array_index_test.cpp @@ -114,10 +114,8 @@ TEST(function_array_index_test, array_contains) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, TypeIndex::Date}; - Array vec = {str_to_date_time("2022-01-02", false), str_to_date_time("", false), - str_to_date_time("2022-07-08", false)}; + Array vec = {str_to_date_time("2022-01-02", false), str_to_date_time("2022-07-08", false)}; DataSet data_set = {{{vec, std::string("2022-01-02")}, UInt8(1)}, - {{vec, std::string("")}, UInt8(1)}, {{vec, std::string("2022-01-03")}, UInt8(0)}, {{Null(), std::string("2022-01-04")}, Null()}, {{empty_arr, std::string("2022-01-02")}, UInt8(0)}}; @@ -129,10 +127,9 @@ TEST(function_array_index_test, array_contains) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, TypeIndex::DateTime}; - Array vec = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time(""), + Array vec = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time("2022-07-08 00:00:00")}; DataSet data_set = {{{vec, std::string("2022-01-02 00:00:00")}, UInt8(1)}, - {{vec, std::string("")}, UInt8(1)}, {{vec, std::string("2022-01-03 00:00:00")}, UInt8(0)}, {{Null(), std::string("2022-01-04 00:00:00")}, Null()}, {{empty_arr, std::string("2022-01-02 00:00:00")}, UInt8(0)}}; @@ -217,10 +214,8 @@ TEST(function_array_index_test, array_position) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, TypeIndex::Date}; - Array vec = {str_to_date_time("2022-01-02", false), str_to_date_time("", false), - str_to_date_time("2022-07-08", false)}; + Array vec = {str_to_date_time("2022-01-02", false), str_to_date_time("2022-07-08", false)}; DataSet data_set = {{{vec, std::string("2022-01-02")}, Int64(1)}, - {{vec, std::string("")}, Int64(2)}, {{vec, std::string("2022-01-03")}, Int64(0)}, {{Null(), std::string("2022-01-04")}, Null()}, {{empty_arr, std::string("2022-01-02")}, Int64(0)}}; @@ -232,10 +227,9 @@ TEST(function_array_index_test, array_position) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, TypeIndex::DateTime}; - Array vec = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time(""), + Array vec = {str_to_date_time("2022-01-02 00:00:00"), str_to_date_time("2022-07-08 00:00:00")}; DataSet data_set = {{{vec, std::string("2022-01-02 00:00:00")}, Int64(1)}, - {{vec, std::string("")}, Int64(2)}, {{vec, std::string("2022-01-03 00:00:00")}, Int64(0)}, {{Null(), std::string("2022-01-04 00:00:00")}, Null()}, {{empty_arr, std::string("2022-01-02 00:00:00")}, Int64(0)}}; diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index f488690f51dbdc..44cb954e3c377b 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -52,18 +52,13 @@ #include "vec/data_types/data_type_number.h" #include "vec/functions/simple_function_factory.h" -namespace doris { -namespace vectorized { +namespace doris::vectorized { + class DataTypeJsonb; class DataTypeTime; class TableFunction; template class DataTypeDecimal; -} // namespace vectorized -} // namespace doris - -namespace doris::vectorized { - using InputDataSet = std::vector>; // without result using CellSet = std::vector; using Expect = AnyType; @@ -71,6 +66,7 @@ using Row = std::pair; using DataSet = std::vector; using InputTypeSet = std::vector; +// FIXME: should use exception or expected to deal null value.w int64_t str_to_date_time(std::string datetime_str, bool data_time = true); uint32_t str_to_date_v2(std::string datetime_str, std::string datetime_format); uint64_t str_to_datetime_v2(std::string datetime_str, std::string datetime_format); @@ -300,7 +296,7 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty if constexpr (std::is_same_v) { const auto& expect_data = any_cast(data_set[i].second); auto s = column->get_data_at(i); - if (expect_data.size() == 0) { + if (expect_data.empty()) { // zero size result means invalid EXPECT_EQ(0, s.size) << " invalid result size should be 0 at row " << i; } else { @@ -321,7 +317,8 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty } else if constexpr (std::is_same_v) { const ColumnBitmap* bitmap_col = nullptr; if constexpr (nullable) { - auto nullable_column = assert_cast(column.get()); + const auto* nullable_column = + assert_cast(column.get()); bitmap_col = assert_cast( nullable_column->get_nested_column_ptr().get()); } else { @@ -344,7 +341,9 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty if constexpr (nullable) { bool is_null = data_set[i].second.type() == &typeid(Null); EXPECT_EQ(is_null, column->is_null_at(i)) << " at row " << i; - if (!is_null) check_column_data(); + if (!is_null) { + check_column_data(); + } } else { check_column_data(); } diff --git a/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md b/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md index ab86ec15c2befd..54f2acd7597656 100644 --- a/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md +++ b/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md @@ -70,23 +70,23 @@ The formats available are: % m | month, numerical value (00-12) -%p | AM or PM +% p | AM or PM, only available on 12-hours system -% R | Time, 12 - hour (hh: mm: SS AM or PM) +% R | Time, 12-hour (hh:mm:ss), could be with or without AM/PM marking % S | seconds (00-59) % s | seconds (00-59) -% T | Time, 24 - hour (hh: mm: ss) +% T | Time, 24-hour (hh:mm:ss) % U | Week (00-53) Sunday is the first day of the week -% U | Week (00 - 53) Monday is the first day of the week +% U | Week (00-53) Monday is the first day of the week % V | Week (01-53) Sunday is the first day of the week, and% X is used. -% v | Week (01 - 53) Monday is the first day of the week, and% x is used +% v | Week (01-53) Monday is the first day of the week, and% x is used % W | Sunday diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md index 69a6315d729f74..a5fb25f29861dc 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md @@ -70,9 +70,9 @@ date 参数是合法的日期。format 规定日期/时间的输出格式。 %m | 月,数值(00-12) -%p | AM 或 PM +%p | AM 或 PM,仅在采用12小时制时可用。 -%r | 时间,12-小时(hh:mm:ss AM 或 PM) +%r | 时间,12-小时(hh:mm:ss),可以包含或不包含AM/PM。 %S | 秒(00-59) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java index 2f7f183b1ad27a..37171b70facd18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java @@ -1203,16 +1203,28 @@ public int hashCode() { // this method is exaclty same as from_date_format_str() in be/src/runtime/datetime_value.cpp // change this method should also change that. public int fromDateFormatStr(String format, String value, boolean hasSubVal) throws InvalidFormatException { - int fp = 0; // pointer to the current format string - int fend = format.length(); // end of format string - int vp = 0; // pointer to the date string value - int vend = value.length(); // end of date string value - - boolean datePartUsed = false; - boolean timePartUsed = false; - boolean microSecondPartUsed = false; - - int dayPart = 0; + int pFormat = 0; // pointer to the current format string + int endFormat = format.length(); // end of format string + int pValue = 0; // pointer to the date string value + int endValue = value.length(); // end of date string value + + int partUsed = 0; + final int yearPart = 1 << 0; + final int monthPart = 1 << 1; + final int dayPart = 1 << 2; + final int weekdayPart = 1 << 3; + final int yeardayPart = 1 << 4; + final int weekNumPart = 1 << 5; + final int normalDatePart = yearPart | monthPart | dayPart; + final int specialDatePart = weekdayPart | yeardayPart | weekNumPart; + final int datePart = normalDatePart | specialDatePart; + final int hourPart = 1 << 6; + final int minutePart = 1 << 7; + final int secondPart = 1 << 8; + final int fracPart = 1 << 9; + final int timePart = hourPart | minutePart | secondPart | fracPart; + + int halfDay = 0; // 0 for am/none, 12 for pm. long weekday = -1; long yearday = -1; long weekNum = -1; @@ -1221,169 +1233,170 @@ public int fromDateFormatStr(String format, String value, boolean hasSubVal) thr boolean sundayFirst = false; boolean strictWeekNumberYearType = false; long strictWeekNumberYear = -1; - boolean usaTime = false; + boolean hourSystem12 = false; // hour in [0..12] and with am/pm - char f; - while (fp < fend && vp < vend) { + char now; + while (pFormat < endFormat && pValue < endValue) { // Skip space character - while (vp < vend && Character.isSpaceChar(value.charAt(vp))) { - vp++; + while (pValue < endValue && Character.isSpaceChar(value.charAt(pValue))) { + pValue++; } - if (vp >= vend) { + if (pValue >= endValue) { break; } // Check switch - f = format.charAt(fp); - if (f == '%' && fp + 1 < fend) { + now = format.charAt(pFormat); + if (now == '%' && pFormat + 1 < endFormat) { int tmp = 0; long intValue = 0; - fp++; - f = format.charAt(fp); - fp++; - switch (f) { + pFormat++; + now = format.charAt(pFormat); + pFormat++; + switch (now) { // Year case 'y': // Year, numeric (two digits) - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); intValue += intValue >= 70 ? 1900 : 2000; this.year = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= yearPart; break; case 'Y': // Year, numeric, four digits - tmp = vp + Math.min(4, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); - if (tmp - vp <= 2) { + tmp = pValue + Math.min(4, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); + if (tmp - pValue <= 2) { intValue += intValue >= 70 ? 1900 : 2000; } this.year = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= yearPart; break; // Month case 'm': case 'c': - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); this.month = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= monthPart; break; case 'M': { - int nextPos = findWord(value, vp); - intValue = checkWord(MONTH_NAME_DICT, value.substring(vp, nextPos)); + int nextPos = findWord(value, pValue); + intValue = checkWord(MONTH_NAME_DICT, value.substring(pValue, nextPos)); this.month = intValue; - vp = nextPos; + pValue = nextPos; + partUsed |= monthPart; break; } case 'b': { - int nextPos = findWord(value, vp); - intValue = checkWord(MONTH_ABBR_NAME_DICT, value.substring(vp, nextPos)); + int nextPos = findWord(value, pValue); + intValue = checkWord(MONTH_ABBR_NAME_DICT, value.substring(pValue, nextPos)); this.month = intValue; - vp = nextPos; + pValue = nextPos; + partUsed |= monthPart; break; } // Day case 'd': case 'e': - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); this.day = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= dayPart; break; case 'D': - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); this.day = intValue; - vp = tmp + Math.min(2, vend - tmp); - datePartUsed = true; + pValue = tmp + Math.min(2, endValue - tmp); + partUsed |= dayPart; break; // Hour case 'h': case 'I': case 'l': - usaTime = true; + hourSystem12 = true; + partUsed |= hourPart; case 'k': // CHECKSTYLE IGNORE THIS LINE: Fall through case 'H': - tmp = findNumber(value, vp, 2); - intValue = strToLong(value.substring(vp, tmp)); + tmp = findNumber(value, pValue, 2); + intValue = strToLong(value.substring(pValue, tmp)); this.hour = intValue; - vp = tmp; - timePartUsed = true; + pValue = tmp; + partUsed |= hourPart; break; // Minute case 'i': - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); this.minute = intValue; - vp = tmp; - timePartUsed = true; + pValue = tmp; + partUsed |= minutePart; break; // Second case 's': case 'S': - tmp = vp + Math.min(2, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); this.second = intValue; - vp = tmp; - timePartUsed = true; + pValue = tmp; + partUsed |= secondPart; break; // Micro second case 'f': - // FIXME: fix same with BE - tmp = vp; + tmp = pValue; // when there's still something to the end, fix the scale of ms. - while (tmp < vend && Character.isDigit(value.charAt(tmp))) { + while (tmp < endValue && Character.isDigit(value.charAt(tmp))) { tmp += 1; } - if (tmp - vp > 6) { - int tmp2 = vp + 6; - intValue = strToLong(value.substring(vp, tmp2)); + if (tmp - pValue > 6) { + int tmp2 = pValue + 6; + intValue = strToLong(value.substring(pValue, tmp2)); } else { - intValue = strToLong(value.substring(vp, tmp)); + intValue = strToLong(value.substring(pValue, tmp)); } - this.microsecond = (long) (intValue * Math.pow(10, 6 - Math.min(6, tmp - vp))); - timePartUsed = true; - microSecondPartUsed = true; - vp = tmp; + this.microsecond = (long) (intValue * Math.pow(10, 6 - Math.min(6, tmp - pValue))); + partUsed |= fracPart; + pValue = tmp; break; // AM/PM case 'p': - if ((vend - vp) < 2 || Character.toUpperCase(value.charAt(vp + 1)) != 'M' || !usaTime) { + if ((endValue - pValue) < 2 || Character.toUpperCase(value.charAt(pValue + 1)) != 'M' + || !hourSystem12) { throw new InvalidFormatException("Invalid %p format"); } - if (Character.toUpperCase(value.charAt(vp)) == 'P') { + if (Character.toUpperCase(value.charAt(pValue)) == 'P') { // PM - dayPart = 12; + halfDay = 12; } - timePartUsed = true; - vp += 2; + pValue += 2; break; // Weekday case 'W': { - int nextPos = findWord(value, vp); - intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(vp, nextPos)); + int nextPos = findWord(value, pValue); + intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(pValue, nextPos)); intValue++; weekday = intValue; - datePartUsed = true; + partUsed |= weekdayPart; break; } case 'a': { - int nextPos = findWord(value, vp); - intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(vp, nextPos)); + int nextPos = findWord(value, pValue); + intValue = checkWord(WEEK_DAY_NAME_DICT, value.substring(pValue, nextPos)); intValue++; weekday = intValue; - datePartUsed = true; + partUsed |= weekdayPart; break; } case 'w': - tmp = vp + Math.min(1, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(1, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); if (intValue >= 7) { throw new InvalidFormatException("invalid day of week: " + intValue); } @@ -1391,97 +1404,97 @@ public int fromDateFormatStr(String format, String value, boolean hasSubVal) thr intValue = 7; } weekday = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= weekdayPart; break; case 'j': - tmp = vp + Math.min(3, vend - vp); - intValue = strToLong(value.substring(vp, tmp)); + tmp = pValue + Math.min(3, endValue - pValue); + intValue = strToLong(value.substring(pValue, tmp)); yearday = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= yeardayPart; break; case 'u': case 'v': case 'U': case 'V': - sundayFirst = (format.charAt(fp - 1) == 'U' || format.charAt(fp - 1) == 'V'); + sundayFirst = (format.charAt(pFormat - 1) == 'U' || format.charAt(pFormat - 1) == 'V'); // Used to check if there is %x or %X - strictWeekNumber = (format.charAt(fp - 1) == 'V' || format.charAt(fp - 1) == 'v'); - tmp = vp + Math.min(2, vend - vp); - intValue = Long.valueOf(value.substring(vp, tmp)); + strictWeekNumber = (format.charAt(pFormat - 1) == 'V' || format.charAt(pFormat - 1) == 'v'); + tmp = pValue + Math.min(2, endValue - pValue); + intValue = Long.valueOf(value.substring(pValue, tmp)); weekNum = intValue; if (weekNum > 53 || (strictWeekNumber && weekNum == 0)) { throw new InvalidFormatException("invalid num of week: " + weekNum); } - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= weekNumPart; break; // strict week number, must be used with %V or %v case 'x': case 'X': - strictWeekNumberYearType = (format.charAt(fp - 1) == 'X'); - tmp = vp + Math.min(4, vend - vp); - intValue = Long.valueOf(value.substring(vp, tmp)); + strictWeekNumberYearType = (format.charAt(pFormat - 1) == 'X'); + tmp = pValue + Math.min(4, endValue - pValue); + intValue = Long.valueOf(value.substring(pValue, tmp)); strictWeekNumberYear = intValue; - vp = tmp; - datePartUsed = true; + pValue = tmp; + partUsed |= weekNumPart; break; case 'r': - tmp = fromDateFormatStr("%I:%i:%S %p", value.substring(vp, vend), true); - vp = tmp; - timePartUsed = true; + tmp = fromDateFormatStr("%I:%i:%S %p", value.substring(pValue, endValue), true); + pValue = tmp; + partUsed |= timePart; break; case 'T': - tmp = fromDateFormatStr("%H:%i:%S", value.substring(vp, vend), true); - vp = tmp; - timePartUsed = true; + tmp = fromDateFormatStr("%H:%i:%S", value.substring(pValue, endValue), true); + pValue = tmp; + partUsed |= timePart; break; case '.': - while (vp < vend && Character.toString(value.charAt(vp)).matches("\\p{Punct}")) { - vp++; + while (pValue < endValue && Character.toString(value.charAt(pValue)).matches("\\p{Punct}")) { + pValue++; } break; case '@': - while (vp < vend && Character.isLetter(value.charAt(vp))) { - vp++; + while (pValue < endValue && Character.isLetter(value.charAt(pValue))) { + pValue++; } break; case '#': - while (vp < vend && Character.isDigit(value.charAt(vp))) { - vp++; + while (pValue < endValue && Character.isDigit(value.charAt(pValue))) { + pValue++; } break; case '%': // %%, escape the % - if ('%' != value.charAt(vp)) { - throw new InvalidFormatException("invalid char after %: " + value.charAt(vp)); + if ('%' != value.charAt(pValue)) { + throw new InvalidFormatException("invalid char after %: " + value.charAt(pValue)); } - vp++; + pValue++; break; default: - throw new InvalidFormatException("Invalid format pattern: " + f); + throw new InvalidFormatException("Invalid format pattern: " + now); } - } else if (format.charAt(fp) != ' ') { - if (format.charAt(fp) != value.charAt(vp)) { - throw new InvalidFormatException("Invalid char: " + value.charAt(vp) + ", expected: " - + format.charAt(fp)); + } else if (format.charAt(pFormat) != ' ') { + if (format.charAt(pFormat) != value.charAt(pValue)) { + throw new InvalidFormatException("Invalid char: " + value.charAt(pValue) + ", expected: " + + format.charAt(pFormat)); } - fp++; - vp++; + pFormat++; + pValue++; } else { - fp++; + pFormat++; } } // continue to iterate pattern if has // to find out if it has time part. - while (fp < fend) { - f = format.charAt(fp); - if (f == '%' && fp + 1 < fend) { - fp++; - f = format.charAt(fp); - fp++; - switch (f) { + while (pFormat < endFormat) { + now = format.charAt(pFormat); + if (now == '%' && pFormat + 1 < endFormat) { + pFormat++; + now = format.charAt(pFormat); + pFormat++; + switch (now) { case 'H': case 'h': case 'I': @@ -1493,25 +1506,29 @@ public int fromDateFormatStr(String format, String value, boolean hasSubVal) thr case 'S': case 'p': case 'T': - timePartUsed = true; + partUsed |= timePart; break; default: break; } } else { - fp++; + pFormat++; } } - if (usaTime) { + if (partUsed == 0) { + throw new InvalidFormatException("Nothing for legal Date: " + value); + } + + if (hourSystem12) { if (this.hour > 12 || this.hour < 1) { throw new InvalidFormatException("Invalid hour: " + hour); } - this.hour = (this.hour % 12) + dayPart; + this.hour = (this.hour % 12) + halfDay; } if (hasSubVal) { - return vp; + return pValue; } // Year day @@ -1540,21 +1557,21 @@ public int fromDateFormatStr(String format, String value, boolean hasSubVal) thr getDateFromDaynr(days); } - if (!timePartUsed && year > 0) { - if (format.equals("%Y")) { - month = day = 1; - } else if (format.equals("%Y-%m")) { + // complete default month/day + if ((partUsed & ~normalDatePart) == 0) { // only date here + if ((partUsed & dayPart) == 0) { day = 1; + if ((partUsed & monthPart) == 0) { + month = 1; + } } } // Compute timestamp type - // TODO(Gabriel): we still use old version datetime/date and change this to new version when - // we think it's stable enough - if (datePartUsed) { - if (microSecondPartUsed) { + if ((partUsed & datePart) != 0) { // Ymd part only + if ((partUsed & fracPart) != 0) { this.type = Type.DATETIMEV2_WITH_MAX_SCALAR; - } else if (timePartUsed) { + } else if ((partUsed & timePart) != 0) { this.type = ScalarType.getDefaultDateType(Type.DATETIME); } else { this.type = ScalarType.getDefaultDateType(Type.DATE); diff --git a/regression-test/data/correctness/test_str_to_date.out b/regression-test/data/correctness/test_str_to_date.out index 5aa1f75a2ccf9f..633b590ad9803b 100644 --- a/regression-test/data/correctness/test_str_to_date.out +++ b/regression-test/data/correctness/test_str_to_date.out @@ -13,15 +13,18 @@ -- !select4 -- 2020-12-03T11:45:14 --- !add_1 -- +-- !short_nereids_1 -- 2023-01-01 --- !add_2 -- +-- !short_nereids_2 -- 2023-12-01 --- !add_3 -- +-- !short_nereids_3 -- 2023-01-01 +-- !short_nereids_4 -- +2020-02-01 + -- !select5 -- 2019-12-01 yyyy-MM-dd 2019-12-01T00:00 20201203 yyyyMMdd 2020-12-03T00:00 @@ -36,12 +39,15 @@ -- !select8 -- 2020-12-03T11:45:14 --- !add_4 -- +-- !short_legacy_1 -- 2023-01-01 --- !add_5 -- +-- !short_legacy_2 -- 2023-12-01 --- !add_6 -- +-- !short_legacy_3 -- 2023-01-01 +-- !short_legacy_4 -- +2020-02-01 + diff --git a/regression-test/data/datatype_p0/date/test_invalid_date.out b/regression-test/data/datatype_p0/date/test_invalid_date.out deleted file mode 100644 index 80b3c3963d3f42..00000000000000 --- a/regression-test/data/datatype_p0/date/test_invalid_date.out +++ /dev/null @@ -1,7 +0,0 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !sql1 -- -\N - --- !sql2 -- -\N - diff --git a/regression-test/suites/correctness/test_str_to_date.groovy b/regression-test/suites/correctness/test_str_to_date.groovy index 43c80d4f4ba4e1..300f9cb92167e7 100644 --- a/regression-test/suites/correctness/test_str_to_date.groovy +++ b/regression-test/suites/correctness/test_str_to_date.groovy @@ -49,9 +49,10 @@ sql """ set enable_nereids_planner=true , enable_fallback_to_original_planner=f qt_select4 """ SELECT STR_TO_DATE('2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss'); """ - qt_add_1 " select STR_TO_DATE('2023', '%Y') " - qt_add_2 " select STR_TO_DATE('2023-12', '%Y-%m') " - qt_add_3 " select STR_TO_DATE('2023-12', '%Y')" + qt_short_nereids_1 " select STR_TO_DATE('2023', '%Y') " + qt_short_nereids_2 " select STR_TO_DATE('2023-12', '%Y-%m') " + qt_short_nereids_3 " select STR_TO_DATE('2023-12', '%Y')" + qt_short_nereids_4 " select STR_TO_DATE('2020%2', '%Y%%%m')" sql """ set enable_nereids_planner=false;""" @@ -67,7 +68,8 @@ sql """ set enable_nereids_planner=false;""" qt_select8 """ SELECT STR_TO_DATE('2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss'); """ - qt_add_4 " select STR_TO_DATE('2023', '%Y') " - qt_add_5 " select STR_TO_DATE('2023-12', '%Y-%m') " - qt_add_6 " select STR_TO_DATE('2023-12', '%Y')" + qt_short_legacy_1 " select STR_TO_DATE('2023', '%Y') " + qt_short_legacy_2 " select STR_TO_DATE('2023-12', '%Y-%m') " + qt_short_legacy_3 " select STR_TO_DATE('2023-12', '%Y')" + qt_short_legacy_4 " select STR_TO_DATE('2020%2', '%Y%%%m')" } diff --git a/regression-test/suites/datatype_p0/date/test_invalid_date.groovy b/regression-test/suites/datatype_p0/date/test_invalid_date.groovy deleted file mode 100644 index 5a7af4703577db..00000000000000 --- a/regression-test/suites/datatype_p0/date/test_invalid_date.groovy +++ /dev/null @@ -1,37 +0,0 @@ - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("test_invalid_date") { - def tbName = "test_invalid_date" - sql "DROP TABLE IF EXISTS ${tbName}" - sql """ - CREATE TABLE IF NOT EXISTS ${tbName} ( - c0 int, - c1 char(10), - c2 datev1, - c3 datev2 - ) - UNIQUE KEY(c0) - DISTRIBUTED BY HASH(c0) BUCKETS 5 properties("replication_num" = "1"); - """ - sql "insert into ${tbName} values(1, 'test1', '2000-01-01', '2000-01-01')" - - qt_sql1 "select str_to_date('202301', '%Y%m');" - qt_sql2 "select str_to_date('202301', '%Y%m') from ${tbName}" - sql "DROP TABLE ${tbName}" -}