Skip to content

Commit

Permalink
[fix](function) fix last_value get wrong result when have order by cl…
Browse files Browse the repository at this point in the history
…ause (apache#9247)
  • Loading branch information
zhangstar333 authored and weizhengte committed May 16, 2022
1 parent 71f2279 commit d78ba9c
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 93 deletions.
21 changes: 11 additions & 10 deletions be/src/vec/aggregate_functions/aggregate_function_window.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,12 @@ struct CopiedValue : public Value {
std::string _copied_value;
};

template <typename T, bool is_nullable, bool is_string, typename StoreType = Value>
template <typename T, bool result_is_nullable, bool is_string, typename StoreType = Value>
struct LeadAndLagData {
public:
bool has_init() const { return _is_init; }

static constexpr bool nullable = is_nullable;
static constexpr bool nullable = result_is_nullable;

void set_null_if_need() {
if (!_has_value) {
Expand All @@ -204,7 +204,7 @@ struct LeadAndLagData {
}

void insert_result_into(IColumn& to) const {
if constexpr (is_nullable) {
if constexpr (result_is_nullable) {
if (_data_value.is_null()) {
auto& col = assert_cast<ColumnNullable&>(to);
col.insert_default();
Expand All @@ -231,7 +231,7 @@ struct LeadAndLagData {
}

void set_value(const IColumn** columns, int64_t pos) {
if constexpr (is_nullable) {
if (is_column_nullable(*columns[0])) {
const auto* nullable_column = assert_cast<const ColumnNullable*>(columns[0]);
if (nullable_column && nullable_column->is_null_at(pos)) {
_data_value.set_null(true);
Expand Down Expand Up @@ -501,7 +501,7 @@ class WindowFunctionData final
};

template <template <typename> class AggregateFunctionTemplate, template <typename> class Data,
bool is_nullable, bool is_copy = false>
bool result_is_nullable, bool is_copy = false>
static IAggregateFunction* create_function_single_value(const String& name,
const DataTypes& argument_types,
const Array& parameters) {
Expand All @@ -515,23 +515,24 @@ static IAggregateFunction* create_function_single_value(const String& name,
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return new AggregateFunctionTemplate< \
Data<LeadAndLagData<TYPE, is_nullable, false, StoreType>>>(argument_types);
Data<LeadAndLagData<TYPE, result_is_nullable, false, StoreType>>>(argument_types);
FOR_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH

if (which.is_decimal()) {
return new AggregateFunctionTemplate<
Data<LeadAndLagData<Int128, is_nullable, false, StoreType>>>(argument_types);
Data<LeadAndLagData<Int128, result_is_nullable, false, StoreType>>>(argument_types);
}
if (which.is_date_or_datetime()) {
return new AggregateFunctionTemplate<
Data<LeadAndLagData<Int64, is_nullable, false, StoreType>>>(argument_types);
Data<LeadAndLagData<Int64, result_is_nullable, false, StoreType>>>(argument_types);
}
if (which.is_string_or_fixed_string()) {
return new AggregateFunctionTemplate<
Data<LeadAndLagData<StringRef, is_nullable, true, StoreType>>>(argument_types);
Data<LeadAndLagData<StringRef, result_is_nullable, true, StoreType>>>(
argument_types);
}
DCHECK(false) << "with unknowed type, failed in create_aggregate_function_leadlag";
DCHECK(false) << "with unknowed type, failed in create_aggregate_function_" << name;
return nullptr;
}

Expand Down
135 changes: 55 additions & 80 deletions be/test/vec/function/function_bitmap_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,157 +26,135 @@ TEST(function_bitmap_test, function_bitmap_min_test) {
std::string func_name = "bitmap_min";
InputTypeSet input_types = {TypeIndex::BitMap};

auto bitmap1 = new BitmapValue(1);
auto bitmap2 = new BitmapValue(std::vector<uint64_t>({1, 9999999}));
auto empty_bitmap = new BitmapValue();
DataSet data_set = {{{bitmap1}, (int64_t)1},
{{bitmap2}, (int64_t)1},
{{empty_bitmap}, Null()},
BitmapValue bitmap1(1);
BitmapValue bitmap2({1, 9999999});
BitmapValue empty_bitmap;
DataSet data_set = {{{&bitmap1}, (int64_t)1},
{{&bitmap2}, (int64_t)1},
{{&empty_bitmap}, Null()},
{{Null()}, Null()}};

check_function<DataTypeInt64, true>(func_name, input_types, data_set);
delete bitmap1;
delete bitmap2;
delete empty_bitmap;
}
TEST(function_bitmap_test, function_bitmap_max_test) {
std::string func_name = "bitmap_max";
InputTypeSet input_types = {TypeIndex::BitMap};

auto bitmap1 = new BitmapValue(1);
auto bitmap2 = new BitmapValue(std::vector<uint64_t>({1, 9999999}));
auto empty_bitmap = new BitmapValue();
DataSet data_set = {{{bitmap1}, (int64_t)1},
{{bitmap2}, (int64_t)9999999},
{{empty_bitmap}, Null()},
BitmapValue bitmap1(1);
BitmapValue bitmap2({1, 9999999});
BitmapValue empty_bitmap;
DataSet data_set = {{{&bitmap1}, (int64_t)1},
{{&bitmap2}, (int64_t)9999999},
{{&empty_bitmap}, Null()},
{{Null()}, Null()}};

check_function<DataTypeInt64, true>(func_name, input_types, data_set);
delete bitmap1;
delete bitmap2;
delete empty_bitmap;
}

TEST(function_bitmap_test, function_bitmap_to_string_test) {
std::string func_name = "bitmap_to_string";
InputTypeSet input_types = {TypeIndex::BitMap};

auto bitmap1 = new BitmapValue(1);
auto bitmap2 = new BitmapValue(std::vector<uint64_t>({1, 9999999}));
auto empty_bitmap = new BitmapValue();
DataSet data_set = {{{bitmap1}, std::string("1")},
{{bitmap2}, std::string("1,9999999")},
{{empty_bitmap}, std::string("")},
BitmapValue bitmap1(1);
BitmapValue bitmap2({1, 9999999});
BitmapValue empty_bitmap;
DataSet data_set = {{{&bitmap1}, std::string("1")},
{{&bitmap2}, std::string("1,9999999")},
{{&empty_bitmap}, std::string("")},
{{Null()}, Null()}};

check_function<DataTypeString, true>(func_name, input_types, data_set);
delete bitmap1;
delete bitmap2;
delete empty_bitmap;
}

TEST(function_bitmap_test, function_bitmap_and_count) {
std::string func_name = "bitmap_and_count";
InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap};
auto bitmap1 = new BitmapValue(std::vector<uint64_t>({1, 2, 3}));
auto bitmap2 = new BitmapValue(std::vector<uint64_t>({3, 4, 5}));
auto empty_bitmap = new BitmapValue();
DataSet data_set = {{{bitmap1, empty_bitmap}, (int64_t)0},
{{bitmap1, bitmap1}, (int64_t)3},
{{bitmap1, bitmap2}, (int64_t)1}};
BitmapValue bitmap1({1, 2, 3});
BitmapValue bitmap2({3, 4, 5});
BitmapValue empty_bitmap;
DataSet data_set = {{{&bitmap1, &empty_bitmap}, (int64_t)0},
{{&bitmap1, &bitmap1}, (int64_t)3},
{{&bitmap1, &bitmap2}, (int64_t)1}};

check_function<DataTypeInt64, true>(func_name, input_types, data_set);
delete bitmap1;
delete bitmap2;
delete empty_bitmap;

{
InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap, TypeIndex::BitMap};
BitmapValue bitmap1({33, 1, 2019});
BitmapValue bitmap2({0, 33, std::numeric_limits<uint64_t>::min()});
BitmapValue bitmap3({33, 5, std::numeric_limits<uint64_t>::max()});
auto empty_bitmap = new BitmapValue(); //test empty
BitmapValue empty_bitmap; //test empty

DataSet data_set = {{{&bitmap1, &bitmap2, empty_bitmap}, (int64_t)0},
DataSet data_set = {{{&bitmap1, &bitmap2, &empty_bitmap}, (int64_t)0},
{{&bitmap1, &bitmap2, &bitmap3}, (int64_t)1}, //33
{{&bitmap1, &bitmap2, Null()}, Null()},
{{&bitmap1, &bitmap3, &bitmap3}, (int64_t)1}}; //33

check_function<DataTypeInt64, true>(func_name, input_types, data_set);
delete empty_bitmap;
}
}

TEST(function_bitmap_test, function_bitmap_or_count) {
std::string func_name = "bitmap_or_count";
InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap};
auto bitmap1 = new BitmapValue(std::vector<uint64_t>({1, 2, 3}));
auto bitmap2 = new BitmapValue(std::vector<uint64_t>({1, 2, 3, 4}));
auto bitmap3 = new BitmapValue(std::vector<uint64_t>({2, 3}));
auto empty_bitmap = new BitmapValue();
DataSet data_set = {{{bitmap1, empty_bitmap}, (int64_t)3},
{{bitmap2, bitmap3}, (int64_t)4},
{{bitmap1, bitmap3}, (int64_t)3}};

BitmapValue bitmap1({1, 2, 3});
BitmapValue bitmap2({1, 2, 3, 4});
BitmapValue bitmap3({2, 3});
BitmapValue empty_bitmap;
DataSet data_set = {{{&bitmap1, &empty_bitmap}, (int64_t)3},
{{&bitmap2, &bitmap3}, (int64_t)4},
{{&bitmap1, &bitmap3}, (int64_t)3}};

check_function<DataTypeInt64, true>(func_name, input_types, data_set);
delete bitmap1;
delete bitmap2;
delete bitmap3;
delete empty_bitmap;

{
InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap, TypeIndex::BitMap};
BitmapValue bitmap1({1024, 1, 2019});
BitmapValue bitmap2({0, 33, std::numeric_limits<uint64_t>::min()});
BitmapValue bitmap3({33, 5, std::numeric_limits<uint64_t>::max()}); //18446744073709551615
auto empty_bitmap = new BitmapValue(); //test empty
BitmapValue empty_bitmap; //test empty

DataSet data_set = {{{&bitmap1, &bitmap2, empty_bitmap}, (int64_t)5}, //0,1,33,1024,2019
DataSet data_set = {{{&bitmap1, &bitmap2, &empty_bitmap}, (int64_t)5}, //0,1,33,1024,2019
{{&bitmap1, &bitmap2, &bitmap3},
(int64_t)7}, //0,1,5,33,1024,2019,18446744073709551615
{{&bitmap1, empty_bitmap, Null()}, Null()},
{{&bitmap1, &empty_bitmap, Null()}, Null()},
{{&bitmap1, &bitmap3, &bitmap3},
(int64_t)6}}; //1,5,33,1024,2019,18446744073709551615

check_function<DataTypeInt64, true>(func_name, input_types, data_set);
delete empty_bitmap;
}
}

TEST(function_bitmap_test, function_bitmap_xor_count) {
std::string func_name = "bitmap_xor_count";
InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap};
auto bitmap1 = new BitmapValue(std::vector<uint64_t>({1, 2, 3}));
auto bitmap2 = new BitmapValue(std::vector<uint64_t>({1, 2, 3, 4}));
auto bitmap3 = new BitmapValue(std::vector<uint64_t>({2, 3}));
auto bitmap4 = new BitmapValue(std::vector<uint64_t>({1, 2, 6}));
auto empty_bitmap = new BitmapValue();
DataSet data_set = {{{bitmap1, empty_bitmap}, (int64_t)3},
{{bitmap2, bitmap3}, (int64_t)2},
{{bitmap1, bitmap4}, (int64_t)2}};

BitmapValue bitmap1({1, 2, 3});
BitmapValue bitmap2({1, 2, 3, 4});
BitmapValue bitmap3({2, 3});
BitmapValue bitmap4({1, 2, 6});
BitmapValue empty_bitmap;
DataSet data_set = {{{&bitmap1, &empty_bitmap}, (int64_t)3},
{{&bitmap2, &bitmap3}, (int64_t)2},
{{&bitmap1, &bitmap4}, (int64_t)2}};

check_function<DataTypeInt64, true>(func_name, input_types, data_set);
delete bitmap1;
delete bitmap2;
delete bitmap3;
delete bitmap4;
delete empty_bitmap;

{
InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap, TypeIndex::BitMap};
BitmapValue bitmap1({1024, 1, 2019});
BitmapValue bitmap2({0, 33, std::numeric_limits<uint64_t>::min()});
BitmapValue bitmap3({33, 5, std::numeric_limits<uint64_t>::max()});
auto empty_bitmap = new BitmapValue(); //test empty
BitmapValue empty_bitmap; //test empty

DataSet data_set = {
{{&bitmap1, &bitmap2, empty_bitmap}, (int64_t)5}, //0,1,33,1024,2019
{{&bitmap1, &bitmap2, &empty_bitmap}, (int64_t)5}, //0,1,33,1024,2019
{{&bitmap1, &bitmap2, &bitmap3}, (int64_t)6}, //0,1,5,1024,2019,18446744073709551615
{{&bitmap1, empty_bitmap, Null()}, Null()},
{{&bitmap1, &empty_bitmap, Null()}, Null()},
{{&bitmap1, &bitmap3, &bitmap3}, (int64_t)3}}; //1,1024,2019

check_function<DataTypeInt64, true>(func_name, input_types, data_set);
delete empty_bitmap;
}
}

Expand All @@ -186,15 +164,14 @@ TEST(function_bitmap_test, function_bitmap_and_not_count) {
BitmapValue bitmap1({1, 2, 3});
BitmapValue bitmap2({3, 4, std::numeric_limits<uint64_t>::min()});
BitmapValue bitmap3({33, 5, std::numeric_limits<uint64_t>::max()});
auto empty_bitmap = new BitmapValue();
BitmapValue empty_bitmap;

DataSet data_set = {{{&bitmap1, empty_bitmap}, (int64_t)3}, //1,2,3
DataSet data_set = {{{&bitmap1, &empty_bitmap}, (int64_t)3}, //1,2,3
{{&bitmap2, Null()}, Null()},
{{&bitmap2, &bitmap3}, (int64_t)3}, //0,3,4
{{&bitmap1, &bitmap2}, (int64_t)2}}; //1,2

check_function<DataTypeInt64, true>(func_name, input_types, data_set);
delete empty_bitmap;
}
TEST(function_bitmap_test, function_bitmap_has_all) {
std::string func_name = "bitmap_has_all";
Expand All @@ -207,18 +184,16 @@ TEST(function_bitmap_test, function_bitmap_has_all) {
BitmapValue bitmap3 = BitmapValue({0, 1, 2});
BitmapValue bitmap4 = BitmapValue({0, 1, 2, std::numeric_limits<uint64_t>::max()});
BitmapValue bitmap5 = BitmapValue({0, 1, 2});
auto empty_bitmap1 = new BitmapValue();
auto empty_bitmap2 = new BitmapValue();
BitmapValue empty_bitmap1;
BitmapValue empty_bitmap2;

DataSet data_set = {{{&bitmap1, &bitmap2}, uint8(true)},
{{empty_bitmap1, empty_bitmap2}, uint8(true)},
{{&empty_bitmap1, &empty_bitmap2}, uint8(true)},
{{&bitmap3, &bitmap4}, uint8(false)},
{{&bitmap4, &bitmap5}, uint8(true)},
{{Null(), empty_bitmap1}, Null()}};
{{Null(), &empty_bitmap1}, Null()}};

check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
delete empty_bitmap1;
delete empty_bitmap2;
}

} // namespace doris::vectorized
Original file line number Diff line number Diff line change
Expand Up @@ -710,9 +710,8 @@ private void standardize(Analyzer analyzer) throws AnalysisException {
resetWindow = true;
}

// Change first_value/last_value RANGE windows to ROWS
if ((analyticFnName.getFunction().equalsIgnoreCase(FIRSTVALUE)
|| analyticFnName.getFunction().equalsIgnoreCase(LASTVALUE))
// Change first_value RANGE windows to ROWS
if ((analyticFnName.getFunction().equalsIgnoreCase(FIRSTVALUE))
&& window != null
&& window.getType() == AnalyticWindow.Type.RANGE) {
window = new AnalyticWindow(AnalyticWindow.Type.ROWS, window.getLeftBoundary(),
Expand Down
8 changes: 8 additions & 0 deletions regression-test/data/correctness/test_last_value_window.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
21 04-21-11 1 1
22 04-22-10-21 0 1
22 04-22-10-21 1 1
23 04-23-10 1 1
24 02-24-10-21 1 1

51 changes: 51 additions & 0 deletions regression-test/suites/correctness/test_last_value_window.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_last_value_window") {
def tableName = "state"


sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE ${tableName} (
`myday` INT,
`time` VARCHAR(40) NOT NULL,
`state` INT
) ENGINE=OLAP
DUPLICATE KEY(`myday`,time,state)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`myday`) BUCKETS 2
PROPERTIES (
"replication_num" = "1",
"in_memory" = "false",
"storage_format" = "V2"
);
"""

sql """ INSERT INTO ${tableName} VALUES
(21,"04-21-11",1),
(22,"04-22-10-21",0),
(22,"04-22-10-21",1),
(23,"04-23-10",1),
(24,"02-24-10-21",1); """

// not_vectorized
sql """ set enable_vectorized_engine = false; """

qt_select_default """ select *,last_value(state) over(partition by myday order by time) from ${tableName}; """

}

0 comments on commit d78ba9c

Please sign in to comment.