From c43ad6f6c8ac7c45f939893fcabfe1aff26313cd Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Tue, 3 Jun 2025 20:08:23 +0800 Subject: [PATCH 1/3] 1 --- be/src/vec/runtime/vdatetime_value.cpp | 50 +++++++------------ .../datatype_p0/datetimev2/test_timezone.out | 16 +++--- .../datetimev2/test_timezone.groovy | 5 ++ 3 files changed, 30 insertions(+), 41 deletions(-) diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 9e27c163876e8c..f1fd0bc722be80 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -2020,7 +2020,7 @@ bool DateV2Value::from_date_str_base(const char* date_str, int len, int scale int field_idx = 0; int field_len = year_len; - long sec_offset = 0; + int sec_offset = 0; bool need_use_timezone = false; while (ptr < end && isdigit(*ptr) && field_idx < MAX_DATE_PARTS) { @@ -2185,40 +2185,28 @@ bool DateV2Value::from_date_str_base(const char* date_str, int len, int scale if (!TimezoneUtils::find_cctz_time_zone(std::string {ptr, end}, given_tz)) { return false; // invalid format } - auto given = cctz::convert(cctz::civil_second {}, given_tz); - auto local = cctz::convert(cctz::civil_second {}, *local_time_zone); - // these two values is absolute time. so they are negative. need to use (-local) - (-given) - sec_offset = std::chrono::duration_cast(given - local).count(); - } - - // In check_range_and_set_time, for Date type the time part will be truncated. So if the timezone offset should make - // rounding to date part, it would be lost. To avoid this, we use a Datetime type to do these calc. It will save the - // time part and apply the offset. Then convert to Date type back. - // see https://github.com/apache/doris/pull/33553 for more details. - if constexpr (!is_datetime) { - if (sec_offset) { - DateV2Value tmp; - if (!tmp.check_range_and_set_time(date_val[0], date_val[1], date_val[2], date_val[3], - date_val[4], date_val[5], date_val[6])) { - return false; - } - if (!tmp.date_add_interval( - TimeInterval {TimeUnit::SECOND, sec_offset, false})) { - return false; - } - this->assign_from(tmp); - return true; + if (is_invalid(date_val[0], date_val[1], date_val[2], date_val[3], date_val[4], date_val[5], + date_val[6])) { + return false; } - } + cctz::civil_second cs {date_val[0], date_val[1], date_val[2], + date_val[3], date_val[4], date_val[5]}; - if (!check_range_and_set_time(date_val[0], date_val[1], date_val[2], date_val[3], date_val[4], - date_val[5], date_val[6])) { - return false; + auto given = cctz::convert(cs, given_tz); + auto local = cctz::convert(given, *local_time_zone); + date_val[0] = local.year(); + date_val[1] = local.month(); + date_val[2] = local.day(); + date_val[3] = local.hour(); + date_val[4] = local.minute(); + date_val[5] = local.second(); } - return sec_offset ? date_add_interval( - TimeInterval {TimeUnit::SECOND, sec_offset, false}) - : true; + return check_range_and_set_time(date_val[0], date_val[1], date_val[2], date_val[3], date_val[4], + date_val[5], date_val[6]) && + (sec_offset ? date_add_interval( + TimeInterval {TimeUnit::SECOND, sec_offset, false}) + : true); } template diff --git a/regression-test/data/datatype_p0/datetimev2/test_timezone.out b/regression-test/data/datatype_p0/datetimev2/test_timezone.out index 1fae14def399b1..6b1b3ebb41de81 100644 --- a/regression-test/data/datatype_p0/datetimev2/test_timezone.out +++ b/regression-test/data/datatype_p0/datetimev2/test_timezone.out @@ -1,14 +1,4 @@ -- This file is automatically generated. You should know what you did if you want to edit this --- !legacy -- -2022-01-01T01:02:55 2022-01-01 -2022-02-01T03:02:55 2022-02-01 -2022-02-28T19:02:55 2022-03-01 -2022-04-01T09:02:55 2022-03-31 -2022-05-01T00:32:55 2022-05-01 -2022-05-31T22:32:55 2022-06-01 -2022-06-30T20:02:55 2022-07-01 -2022-07-31T21:00 2022-08-01 - -- !nereids -- 2022-01-01T01:02:55 2022-01-01 2022-02-01T03:02:55 2022-02-01 @@ -25,3 +15,9 @@ -- !fold3 -- 2020-12-12T13:12:12 +-- !nodst -- +2010-01-05T10:15:30 + +-- !dst -- +2010-08-05T09:15:30 + diff --git a/regression-test/suites/datatype_p0/datetimev2/test_timezone.groovy b/regression-test/suites/datatype_p0/datetimev2/test_timezone.groovy index 746a78de4417b3..5d3d7c272ca8ec 100644 --- a/regression-test/suites/datatype_p0/datetimev2/test_timezone.groovy +++ b/regression-test/suites/datatype_p0/datetimev2/test_timezone.groovy @@ -43,4 +43,9 @@ suite("test_timezone") { qt_fold1 """ select cast('2020-12-12T12:12:12asia/shanghai' as datetime); """ qt_fold2 """ select cast('2020-12-12T12:12:12america/los_angeLES' as datetime); """ qt_fold3 """ select cast('2020-12-12T12:12:12Europe/pARIS' as datetime); """ + + qt_nodst "select cast('2010-01-05 08:15:30Europe/London' as datetime);" + qt_dst "select cast('2010-08-05 08:15:30Europe/London' as datetime);" + testFoldConst ("select cast('2010-01-05 08:15:30Europe/London' as datetime);") + testFoldConst ("select cast('2010-08-05 08:15:30Europe/London' as datetime);") } From 783ea731852f3c3bb10b6579e442574bcc600915 Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Mon, 23 Jun 2025 20:01:47 +0800 Subject: [PATCH 2/3] fix --- .../segment_v2/inverted_index/util/priority_queue.h | 2 +- be/src/vec/runtime/vdatetime_value.cpp | 1 + be/test/vec/function/cast/cast_to_date_test.cpp | 8 +++++--- be/test/vec/function/cast/cast_to_datetime_test.cpp | 3 ++- be/test/vec/function/function_test_util.h | 11 +++-------- regression-test/data/cast_p0/cast_to_datetime.out | 8 ++++---- .../datatype_p0/datetimev2/test_tz_streamload.out | 2 +- 7 files changed, 17 insertions(+), 18 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index/util/priority_queue.h b/be/src/olap/rowset/segment_v2/inverted_index/util/priority_queue.h index f7d54323355433..24cd5298f628ed 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/util/priority_queue.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/util/priority_queue.h @@ -28,7 +28,7 @@ template class PriorityQueue { public: PriorityQueue(size_t max_size, std::function sentinel_object_supplier = nullptr) { - assert(max_size >= 0 && max_size < std::numeric_limits::max()); + assert(max_size < std::numeric_limits::max()); size_t heap_size = (max_size == 0) ? 2 : max_size + 1; _heap.resize(heap_size); _max_size = max_size; diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index f1fd0bc722be80..9f98fdb016fc7c 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -2189,6 +2189,7 @@ bool DateV2Value::from_date_str_base(const char* date_str, int len, int scale date_val[6])) { return false; } + // will carring on the bits in cctz::civil_second. if day is 70, will carry to month. cctz::civil_second cs {date_val[0], date_val[1], date_val[2], date_val[3], date_val[4], date_val[5]}; diff --git a/be/test/vec/function/cast/cast_to_date_test.cpp b/be/test/vec/function/cast/cast_to_date_test.cpp index 5cc42d391eedba..d79dc96e3d9481 100644 --- a/be/test/vec/function/cast/cast_to_date_test.cpp +++ b/be/test/vec/function/cast/cast_to_date_test.cpp @@ -27,9 +27,11 @@ TEST_F(FunctionCastTest, strict_test_from_string_to_date) { // Valid ISO 8601 format with timezone {{std::string("2023-07-16T19:20:30.123+08:00")}, std::string("2023-07-16")}, {{std::string("2023-07-16T19+08:00")}, std::string("2023-07-16")}, - {{std::string("2023-07-16T1920+08:00")}, std::string("2023-07-16")}, + // keep its origin behaviour. timezone offset and time part could together impact date part. + // here 1920 is treated as 1,920 hours and carried over to the day. + {{std::string("2023-07-16T1920+08:00")}, std::string("2023-10-04")}, {{std::string("70-1-1T00:00:00-0000")}, std::string("1970-01-01")}, - {{std::string("19991231T235960.5UTC")}, Null()}, + {{std::string("19991231T235960.5UTC")}, std::string("2000-01-01")}, // Date with timezone names {{std::string("2024-02-29 12:00:00 Europe/Paris")}, std::string("2024-02-29")}, @@ -101,7 +103,7 @@ TEST_F(FunctionCastTest, non_strict_test_from_string_to_date) { {{std::string("0023-1-1T1:2:3. -00:00")}, std::string("0023-01-01")}, {{std::string("2025/06/15T00:00:00.0-0")}, std::string("2025-06-15")}, {{std::string("2025/06/15T00:00:00.99999999999")}, std::string("2025-06-15")}, - {{std::string("2024-02-29T23-59-60ZULU")}, Null()}, + {{std::string("2024-02-29T23-59-60ZULU")}, std::string("2024-03-01")}, {{std::string("2024 12 31T121212.123456 America/New_York")}, Null()}, // Invalid formats diff --git a/be/test/vec/function/cast/cast_to_datetime_test.cpp b/be/test/vec/function/cast/cast_to_datetime_test.cpp index f9f3df4420c6bd..73cb7b7abd91a2 100644 --- a/be/test/vec/function/cast/cast_to_datetime_test.cpp +++ b/be/test/vec/function/cast/cast_to_datetime_test.cpp @@ -56,7 +56,8 @@ TEST_F(FunctionCastTest, strict_test_from_string_to_datetime) { // Various timezone offsets {{std::string("2020-12-12 13:12:12-03:00")}, std::string("2020-12-13 00:12:12")}, - {{std::string("0023-01-01T00:00Z")}, std::string("0023-01-01 08:00:00")}, + // CST of shanghai before 1900 is not +080000 but +080543 + {{std::string("0023-01-01T00:00Z")}, std::string("0023-01-01 08:05:43")}, // Year cutoff cases {{std::string("69-12-31")}, std::string("2069-12-31")}, diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 230668deb1b049..263080761c26b7 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -418,15 +418,10 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty << ", expected result: " << result_type_ptr->to_string(*expected_col_ptr, i); } else { auto comp_res = column->compare_at(i, i, *expected_col_ptr, 1); - if (0 != comp_res) { - std::cerr << "function " << func_name << " result mismatch, row " << i << ":\n" - << block.dump_data(i, 1) << std::endl - << ", expected result: " - << result_type_ptr->to_string(*expected_col_ptr, i) << std::endl; - } EXPECT_EQ(0, comp_res) - << ", function " << func_name - << " result: " << block.get_data_types()[result]->to_string(*column, i) + << ", function " << func_name << ". input row:\n" + << block.dump_data(i, 1) + << "result: " << block.get_data_types()[result]->to_string(*column, i) << ", expected result: " << result_type_ptr->to_string(*expected_col_ptr, i); } } diff --git a/regression-test/data/cast_p0/cast_to_datetime.out b/regression-test/data/cast_p0/cast_to_datetime.out index 272752b2ee9f24..66e9b31ee6f7be 100644 --- a/regression-test/data/cast_p0/cast_to_datetime.out +++ b/regression-test/data/cast_p0/cast_to_datetime.out @@ -63,7 +63,7 @@ 2020-12-13T00:12:12 -- !sql -- -0023-01-01T08:00 +0023-01-01T08:05:43 -- !sql -- 2069-12-31T00:00 @@ -243,7 +243,7 @@ 2020-12-13T00:12:12 -- !sql -- -0023-01-01T08:00 +0023-01-01T08:05:43 -- !sql -- 2069-12-31T00:00 @@ -372,7 +372,7 @@ 1970-01-01T00:00 -- !sql -- -2000-01-01T07:59:59 +2000-01-01T08:00 -- !sql -- 2024-05-01T00:00 @@ -423,7 +423,7 @@ 2020-12-13T00:12:12 -- !sql -- -0023-01-01T08:00 +0023-01-01T08:05:43 -- !sql -- 2069-12-31T00:00 diff --git a/regression-test/data/datatype_p0/datetimev2/test_tz_streamload.out b/regression-test/data/datatype_p0/datetimev2/test_tz_streamload.out index ab103c3a306f46..a05ac54d3056dd 100644 --- a/regression-test/data/datatype_p0/datetimev2/test_tz_streamload.out +++ b/regression-test/data/datatype_p0/datetimev2/test_tz_streamload.out @@ -15,7 +15,7 @@ 3 2023-08-17T17:41:18 4 2023-08-17T14:41:18 5 2023-08-17T09:41:18 -6 2023-08-18T01:41:18 +6 2023-08-18T00:41:18 7 2023-08-17T17:41:18 8 2023-08-17T19:41:18 From de0ee4d1d4aa6f874f5138a043e694cf05c74f3b Mon Sep 17 00:00:00 2001 From: zhaochangle Date: Tue, 24 Jun 2025 06:31:13 +0800 Subject: [PATCH 3/3] fix_compile_gcc --- be/src/vec/runtime/vdatetime_value.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 8565e77888fed7..0e31dbcfd28acd 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -1065,8 +1065,8 @@ class DateV2Value { } bool operator==(const VecDateTimeValue& other) const { - int64_t ts1; - int64_t ts2; + int64_t ts1 = 0; + int64_t ts2 = 0; this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); return ts1 == ts2; @@ -1081,8 +1081,8 @@ class DateV2Value { bool operator<=(const DateV2Value& other) const { return !(*this > other); } bool operator<=(const VecDateTimeValue& other) const { - int64_t ts1; - int64_t ts2; + int64_t ts1 = 0; + int64_t ts2 = 0; this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); return ts1 <= ts2; @@ -1091,8 +1091,8 @@ class DateV2Value { bool operator>=(const DateV2Value& other) const { return !(*this < other); } bool operator>=(const VecDateTimeValue& other) const { - int64_t ts1; - int64_t ts2; + int64_t ts1 = 0; + int64_t ts2 = 0; this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); return ts1 >= ts2; @@ -1103,8 +1103,8 @@ class DateV2Value { } bool operator<(const VecDateTimeValue& other) const { - int64_t ts1; - int64_t ts2; + int64_t ts1 = 0; + int64_t ts2 = 0; this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); return ts1 < ts2; @@ -1115,8 +1115,8 @@ class DateV2Value { } bool operator>(const VecDateTimeValue& other) const { - int64_t ts1; - int64_t ts2; + int64_t ts1 = 0; + int64_t ts2 = 0; this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); return ts1 > ts2;