Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test aggregation functions #5364

Merged
merged 23 commits into from
Jul 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
310 changes: 310 additions & 0 deletions dbms/src/Flash/tests/gtest_aggregation_executor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <TestUtils/ExecutorTestUtils.h>
#include <TestUtils/mockExecutor.h>

namespace DB
{
namespace tests
{

#define DT DecimalField<Decimal32>
#define COL_GROUP2(a, b) \
{ \
col(types_col_name[a]), col(types_col_name[b]) \
}
#define COL_PROJ2(a, b) \
{ \
types_col_name[a], types_col_name[b] \
}

class ExecutorAggTestRunner : public DB::tests::ExecutorTest
{
public:
using ColStringNullableType = std::optional<typename TypeTraits<String>::FieldType>;
using ColInt8NullableType = std::optional<typename TypeTraits<Int8>::FieldType>;
using ColInt16NullableType = std::optional<typename TypeTraits<Int16>::FieldType>;
using ColInt32NullableType = std::optional<typename TypeTraits<Int32>::FieldType>;
using ColInt64NullableType = std::optional<typename TypeTraits<Int64>::FieldType>;
using ColFloat32NullableType = std::optional<typename TypeTraits<Float32>::FieldType>;
using ColFloat64NullableType = std::optional<typename TypeTraits<Float64>::FieldType>;
using ColMyDateNullableType = std::optional<typename TypeTraits<MyDate>::FieldType>;
using ColMyDateTimeNullableType = std::optional<typename TypeTraits<MyDateTime>::FieldType>;
using ColDecimalNullableType = std::optional<typename TypeTraits<Decimal32>::FieldType>;
using ColUInt64Type = typename TypeTraits<UInt64>::FieldType;
ywqzzy marked this conversation as resolved.
Show resolved Hide resolved

using ColumnWithNullableString = std::vector<ColStringNullableType>;
using ColumnWithNullableInt8 = std::vector<ColInt8NullableType>;
using ColumnWithNullableInt16 = std::vector<ColInt16NullableType>;
using ColumnWithNullableInt32 = std::vector<ColInt32NullableType>;
using ColumnWithNullableInt64 = std::vector<ColInt64NullableType>;
using ColumnWithNullableFloat32 = std::vector<ColFloat32NullableType>;
using ColumnWithNullableFloat64 = std::vector<ColFloat64NullableType>;
using ColumnWithNullableMyDate = std::vector<ColMyDateNullableType>;
using ColumnWithNullableMyDateTime = std::vector<ColMyDateTimeNullableType>;
using ColumnWithNullableDecimal = std::vector<ColDecimalNullableType>;
using ColumnWithUInt64 = std::vector<ColUInt64Type>;

void initializeContext() override
{
ExecutorTest::initializeContext();

/// Create table for tests of group by
context.addMockTable(/* name= */ {db_name, table_types},
/* columnInfos= */
{{types_col_name[0], TiDB::TP::TypeLong},
{types_col_name[1], TiDB::TP::TypeDecimal},
{types_col_name[2], TiDB::TP::TypeTiny},
{types_col_name[3], TiDB::TP::TypeShort},
{types_col_name[4], TiDB::TP::TypeLong},
{types_col_name[5], TiDB::TP::TypeLongLong},
{types_col_name[6], TiDB::TP::TypeFloat},
{types_col_name[7], TiDB::TP::TypeDouble},
{types_col_name[8], TiDB::TP::TypeDate},
{types_col_name[9], TiDB::TP::TypeDatetime},
{types_col_name[10], TiDB::TP::TypeString}},
/* columns= */
{toNullableVec<Int32>(types_col_name[0], col_id),
toNullableVec<Decimal32>(types_col_name[1], col_decimal),
toNullableVec<Int8>(types_col_name[2], col_tinyint),
toNullableVec<Int16>(types_col_name[3], col_smallint),
toNullableVec<Int32>(types_col_name[4], col_int),
toNullableVec<Int64>(types_col_name[5], col_bigint),
toNullableVec<Float32>(types_col_name[6], col_float),
toNullableVec<Float64>(types_col_name[7], col_double),
toNullableVec<MyDate>(types_col_name[8], col_mydate),
toNullableVec<MyDateTime>(types_col_name[9], col_mydatetime),
toNullableVec<String>(types_col_name[10], col_string)});

/// Create table for tests of aggregation functions
context.addMockTable(/* name= */ {db_name, table_name},
/* columnInfos= */
{{col_name[0], TiDB::TP::TypeLong},
{col_name[1], TiDB::TP::TypeString},
{col_name[2], TiDB::TP::TypeString},
{col_name[3], TiDB::TP::TypeDouble}},
/* columns= */
{toNullableVec<Int32>(col_name[0], col_age),
toNullableVec<String>(col_name[1], col_gender),
toNullableVec<String>(col_name[2], col_country),
toNullableVec<Float64>(col_name[3], col_salary)});
}

std::shared_ptr<tipb::DAGRequest> buildDAGRequest(std::pair<String, String> src, MockAstVec agg_funcs, MockAstVec group_by_exprs, MockColumnNameVec proj)
{
/// We can filter the group by column with project operator.
/// project is applied to get partial aggregation output, so that we can remove redundant outputs and compare results with less handwriting codes.
return context.scan(src.first, src.second).aggregation(agg_funcs, group_by_exprs).project(proj).build(context);
}

void executeWithConcurrency(const std::shared_ptr<tipb::DAGRequest> & request, const ColumnsWithTypeAndName & expect_columns)
{
for (size_t i = 1; i <= max_concurrency; i += step)
ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i));
}

static const size_t max_concurrency = 10;
static const size_t step = 2;

const String db_name{"test_db"};

/// Prepare some data and names for tests of group by
const String table_types{"types"};
const std::vector<String> types_col_name{"id", "decimal_", "tinyint_", "smallint_", "int_", "bigint_", "float_", "double_", "date_", "datetime_", "string_"};
ColumnWithNullableInt32 col_id{1, 2, 3, 4, 5, 6, 7, 8, 9};
ColumnWithNullableDecimal col_decimal{DT(55, 1), {}, DT(-24, 1), DT(40, 1), DT(-40, 1), DT(40, 1), {}, DT(55, 1), DT(0, 1)};
ColumnWithNullableInt8 col_tinyint{1, 2, 3, {}, {}, 0, 0, -1, -2};
ColumnWithNullableInt16 col_smallint{2, 3, {}, {}, 0, -1, -2, 4, 0};
ColumnWithNullableInt32 col_int{4, {}, {}, 0, 123, -1, -1, 123, 4};
ColumnWithNullableInt64 col_bigint{2, 2, {}, 0, -1, {}, -1, 0, 123};
ColumnWithNullableFloat32 col_float{3.3, {}, 0, 4.0, 3.3, 5.6, -0.1, -0.1, {}};
ColumnWithNullableFloat64 col_double{0.1, 0, 1.1, 1.1, 1.2, {}, {}, -1.2, -1.2};
ColumnWithNullableMyDate col_mydate{1000000, 2000000, {}, 300000, 1000000, {}, 0, 2000000, {}};
ColumnWithNullableMyDateTime col_mydatetime{2000000, 0, {}, 3000000, 1000000, {}, 0, 2000000, 1000000};
ColumnWithNullableString col_string{{}, "pingcap", "PingCAP", {}, "PINGCAP", "PingCAP", {}, "Shanghai", "Shanghai"};

/// Prepare some data and names for aggregation functions
const String table_name{"clerk"};
const std::vector<String> col_name{"age", "gender", "country", "salary"};
ColumnWithNullableInt32 col_age{30, {}, 27, 32, 25, 36, {}, 22, 34};
ColumnWithNullableString col_gender{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in one line?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in one line?

It's clang-tidy who forces me to write like this.

"male",
"female",
"female",
"male",
"female",
"female",
"male",
"female",
"male",
};
ColumnWithNullableString col_country{"russia", "korea", "usa", "usa", "usa", "china", "china", "china", "china"};
ColumnWithNullableFloat64 col_salary{1000.1, 1300.2, 0.3, {}, -200.4, 900.5, -999.6, 2000.7, -300.8};
};

/// Guarantee the correctness of group by
TEST_F(ExecutorAggTestRunner, GroupBy)
try
{
std::shared_ptr<tipb::DAGRequest> request;
std::vector<MockAstVec> group_by_exprs;
std::vector<MockColumnNameVec> projections;
std::vector<ColumnsWithTypeAndName> expect_cols;
size_t test_num;

{
/// group by single column
group_by_exprs = {{col(types_col_name[2])}, {col(types_col_name[3])}, {col(types_col_name[4])}, {col(types_col_name[5])}, {col(types_col_name[6])}, {col(types_col_name[7])}, {col(types_col_name[8])}, {col(types_col_name[9])}, {col(types_col_name[10])}};
projections = {{types_col_name[2]}, {types_col_name[3]}, {types_col_name[4]}, {types_col_name[5]}, {types_col_name[6]}, {types_col_name[7]}, {types_col_name[8]}, {types_col_name[9]}, {types_col_name[10]}};
expect_cols = {
{toNullableVec<Int8>(types_col_name[2], ColumnWithNullableInt8{-1, 2, {}, 0, 1, 3, -2})}, /// select tinyint_ from test_db.types group by tinyint_;
{toNullableVec<Int16>(types_col_name[3], ColumnWithNullableInt16{-1, 2, -2, {}, 0, 4, 3})}, /// select smallint_ from test_db.types group by smallint_;
{toNullableVec<Int32>(types_col_name[4], ColumnWithNullableInt32{-1, {}, 4, 0, 123})}, /// select int_ from test_db.types group by int_;
{toNullableVec<Int64>(types_col_name[5], ColumnWithNullableInt64{2, -1, 0, 123, {}})}, /// select bigint_ from test_db.types group by bigint_;
{toNullableVec<Float32>(types_col_name[6], ColumnWithNullableFloat32{0, 4, 3.3, {}, 5.6, -0.1})}, /// select float_ from test_db.types group by float_;
{toNullableVec<Float64>(types_col_name[7], ColumnWithNullableFloat64{0, {}, -1.2, 1.1, 1.2, 0.1})}, /// select double_ from test_db.types group by double_;
{toNullableVec<MyDate>(types_col_name[8], ColumnWithNullableMyDate{{}, 0, 300000, 1000000, 2000000})}, /// select date_ from test_db.types group by date_;
{toNullableVec<MyDateTime>(types_col_name[9], ColumnWithNullableMyDateTime{{}, 0, 1000000, 2000000, 3000000})}, /// select datetime_ from test_db.types group by datetime_;
{toNullableVec<String>(types_col_name[10], ColumnWithNullableString{{}, "pingcap", "PingCAP", "PINGCAP", "Shanghai"})}}; /// select string_ from test_db.types group by string_;
test_num = expect_cols.size();
ASSERT_EQ(group_by_exprs.size(), test_num);
ASSERT_EQ(projections.size(), test_num);

for (size_t i = 0; i < test_num; ++i)
{
request = buildDAGRequest(std::make_pair(db_name, table_types), {}, group_by_exprs[i], projections[i]);
executeWithConcurrency(request, expect_cols[i]);
}
}

{
/// group by two columns
group_by_exprs = {COL_GROUP2(2, 6), COL_GROUP2(3, 9), COL_GROUP2(4, 7), COL_GROUP2(5, 10), COL_GROUP2(8, 9), COL_GROUP2(9, 10)};
projections = {COL_PROJ2(2, 6), COL_PROJ2(3, 9), COL_PROJ2(4, 7), COL_PROJ2(5, 10), COL_PROJ2(8, 9), COL_PROJ2(9, 10)};
expect_cols = {/// select tinyint_, float_ from test_db.types group by tinyint_, float_;
{toNullableVec<Int8>(types_col_name[2], ColumnWithNullableInt8{1, 2, {}, 3, 0, 0, -1, {}, -2}),
toNullableVec<Float32>(types_col_name[6], ColumnWithNullableFloat32{3.3, {}, 4, 0, -0.1, 5.6, -0.1, 3.3, {}})},
/// select smallint_, datetime_ from test_db.types group by smallint_, datetime_;
{toNullableVec<Int16>(types_col_name[3], ColumnWithNullableInt16{2, 3, {}, {}, 0, -1, -2, 4}),
toNullableVec<MyDateTime>(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, {}, 0, 2000000})},
/// select int_, double_ from test_db.types group by int_, double_;
{toNullableVec<Int32>(types_col_name[4], ColumnWithNullableInt32{{}, 123, -1, 0, {}, 4, 4, 123}),
toNullableVec<Float64>(types_col_name[7], ColumnWithNullableFloat64{0, -1.2, {}, 1.1, 1.1, -1.2, 0.1, 1.2})},
/// select bigint_, string_ from test_db.types group by bigint_, string_;
{toNullableVec<Int64>(types_col_name[5], ColumnWithNullableInt64{-1, 0, 0, 123, 2, {}, -1, 2}),
toNullableVec<String>(types_col_name[10], ColumnWithNullableString{{}, {}, "Shanghai", "Shanghai", {}, "PingCAP", "PINGCAP", "pingcap"})},
/// select date_, datetime_ from test_db.types group by date_, datetime_;
{toNullableVec<MyDate>(types_col_name[8], ColumnWithNullableMyDate{1000000, 2000000, {}, 300000, 1000000, 0, 2000000, {}}),
toNullableVec<MyDateTime>(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, 0, 2000000, 1000000})},
/// select datetime_, string_ from test_db.types group by datetime_, string_;
{toNullableVec<MyDateTime>(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, 0, 2000000, 1000000}),
toNullableVec<String>(types_col_name[10], ColumnWithNullableString{{}, "pingcap", "PingCAP", {}, "PINGCAP", {}, "Shanghai", "Shanghai"})}};
test_num = expect_cols.size();
ASSERT_EQ(group_by_exprs.size(), test_num);
ASSERT_EQ(projections.size(), test_num);

for (size_t i = 0; i < test_num; ++i)
{
request = buildDAGRequest(std::make_pair(db_name, table_types), {}, group_by_exprs[i], projections[i]);
executeWithConcurrency(request, expect_cols[i]);
}
}

/// TODO type: decimal, enum and unsigned numbers
}
CATCH

TEST_F(ExecutorAggTestRunner, AggregationMaxAndMin)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add comment like TODO: support more type of min, max, count

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add comment like TODO: support more type of min, max, count

okk

try
{
std::shared_ptr<tipb::DAGRequest> request;
auto agg_func0 = Max(col(col_name[0])); /// select max(age) from clerk group by country;
auto agg_func1 = Max(col(col_name[3])); /// select max(salary) from clerk group by country, gender;

auto group_by_expr0 = col(col_name[2]);
auto group_by_expr10 = col(col_name[2]);
auto group_by_expr11 = col(col_name[1]);

/// Prepare some data for max function test
std::vector<ColumnsWithTypeAndName> expect_cols{
{toNullableVec<Int32>("max(age)", ColumnWithNullableInt32{36, 32, 30, {}})},
{toNullableVec<Float64>("max(salary)", ColumnWithNullableFloat64{2000.7, 1300.2, 1000.1, 0.3, -300.8, {}})}};
std::vector<MockAstVec> group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}};
std::vector<MockColumnNameVec> projections{{"max(age)"}, {"max(salary)"}};
std::vector<MockAstVec> agg_funcs{{agg_func0}, {agg_func1}};
size_t test_num = expect_cols.size();

/// Start to test max function
for (size_t i = 0; i < test_num; ++i)
{
request = buildDAGRequest(std::make_pair(db_name, table_name), agg_funcs[i], group_by_exprs[i], projections[i]);
executeWithConcurrency(request, expect_cols[i]);
}

/// Min function tests

agg_func0 = Min(col(col_name[0])); /// select min(age) from clerk group by country;
agg_func1 = Min(col(col_name[3])); /// select min(salary) from clerk group by country, gender;

expect_cols = {
{toNullableVec<Int32>("min(age)", ColumnWithNullableInt32{30, 25, 22, {}})},
{toNullableVec<Float64>("min(salary)", ColumnWithNullableFloat64{1300.2, 1000.1, 900.5, -200.4, -999.6, {}})}};
projections = {{"min(age)"}, {"min(salary)"}};
agg_funcs = {{agg_func0}, {agg_func1}};
test_num = expect_cols.size();

/// Start to test min function
for (size_t i = 0; i < test_num; ++i)
{
request = buildDAGRequest(std::make_pair(db_name, table_name), agg_funcs[i], group_by_exprs[i], projections[i]);
executeWithConcurrency(request, expect_cols[i]);
}
}
CATCH

TEST_F(ExecutorAggTestRunner, AggregationCount)
try
{
/// Prepare some data
std::shared_ptr<tipb::DAGRequest> request;
auto agg_func0 = Count(col(col_name[0])); /// select count(age) from clerk group by country;
auto agg_func1 = Count(col(col_name[1])); /// select count(gender) from clerk group by country, gender;
std::vector<MockAstVec> agg_funcs = {{agg_func0}, {agg_func1}};

auto group_by_expr0 = col(col_name[2]);
auto group_by_expr10 = col(col_name[2]);
auto group_by_expr11 = col(col_name[1]);

std::vector<ColumnsWithTypeAndName> expect_cols{
{toVec<UInt64>("count(age)", ColumnWithUInt64{3, 3, 1, 0})},
{toVec<UInt64>("count(gender)", ColumnWithUInt64{2, 2, 2, 1, 1, 1})}};
std::vector<MockAstVec> group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}};
std::vector<MockColumnNameVec> projections{{"count(age)"}, {"count(gender)"}};
size_t test_num = expect_cols.size();

/// Start to test
for (size_t i = 0; i < test_num; ++i)
{
request = buildDAGRequest(std::make_pair(db_name, table_name), {agg_funcs[i]}, group_by_exprs[i], projections[i]);
executeWithConcurrency(request, expect_cols[i]);
}
}
CATCH

// TODO support more type of min, max, count.
// support more aggregation functions: sum, forst_row, group_concat

} // namespace tests
} // namespace DB
4 changes: 2 additions & 2 deletions dbms/src/Flash/tests/gtest_topn_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class ExecutorTopNTestRunner : public DB::tests::ExecutorTest
{toNullableVec<Int32>(col_name[0], col_age),
toNullableVec<String>(col_name[1], col_gender),
toNullableVec<String>(col_name[2], col_country),
toNullableVec<Int32>(col_name[3], c0l_salary)});
toNullableVec<Int32>(col_name[3], col_salary)});
}

std::shared_ptr<tipb::DAGRequest> buildDAGRequest(const String & table_name, const String & col_name, bool is_desc, int limit_num)
Expand Down Expand Up @@ -72,7 +72,7 @@ class ExecutorTopNTestRunner : public DB::tests::ExecutorTest
ColumnWithInt32 col_age{{}, 27, 32, 36, {}, 34};
ColumnWithString col_gender{"female", "female", "male", "female", "male", "male"};
ColumnWithString col_country{"korea", "usa", "usa", "china", "china", "china"};
ColumnWithInt32 c0l_salary{1300, 0, {}, 900, {}, -300};
ColumnWithInt32 col_salary{1300, 0, {}, 900, {}, -300};
};

TEST_F(ExecutorTopNTestRunner, TopN)
Expand Down
3 changes: 3 additions & 0 deletions dbms/src/TestUtils/mockExecutor.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,13 @@ MockWindowFrame buildDefaultRowsFrame();
#define And(expr1, expr2) makeASTFunction("and", (expr1), (expr2))
#define Or(expr1, expr2) makeASTFunction("or", (expr1), (expr2))
#define NOT(expr) makeASTFunction("not", (expr))

// Aggregation functions
#define Max(expr) makeASTFunction("max", (expr))
#define Min(expr) makeASTFunction("min", (expr))
#define Count(expr) makeASTFunction("count", (expr))
#define Sum(expr) makeASTFunction("sum", (expr))

/// Window functions
#define RowNumber() makeASTFunction("RowNumber")
#define Rank() makeASTFunction("Rank")
Expand Down