Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

[NSE-1071] Add tiny optimizations for hash aggregation functions #1072

Merged
merged 3 commits into from
Aug 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 40 additions & 38 deletions native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ struct FindAccumulatorType<I, arrow::enable_if_floating_point<I>> {
using Type = arrow::DoubleType;
};

arrow::Status ActionBase::Submit(ArrayList in, int max_group_id,
arrow::Status ActionBase::Submit(const ArrayList& in, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) {
return arrow::Status::NotImplemented("ActionBase Submit is abstract.");
}

arrow::Status ActionBase::Submit(
std::vector<std::shared_ptr<arrow::Array>> in,
const std::vector<std::shared_ptr<arrow::Array>>& in,
std::function<arrow::Status(uint64_t, uint64_t)>* on_valid,
std::function<arrow::Status()>* on_null) {
return arrow::Status::NotImplemented("ActionBase Submit is abstract.");
Expand Down Expand Up @@ -143,7 +143,7 @@ class UniqueAction : public ActionBase {
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -302,7 +302,7 @@ class CountAction : public ActionBase {
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -465,7 +465,7 @@ class CountDistinctAction : public ActionBase {
#endif
}
std::string getName() { return "CountDistinctAction"; }
arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -616,7 +616,7 @@ class CountLiteralAction : public ActionBase {
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -740,7 +740,7 @@ class MinAction<DataType, CType, precompile::enable_if_number<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1022,7 +1022,7 @@ class MinAction<DataType, CType, precompile::enable_if_decimal<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1179,7 +1179,7 @@ class MinAction<DataType, CType, precompile::enable_if_string_like<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1350,7 +1350,7 @@ class MaxAction<DataType, CType, precompile::enable_if_number<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1627,7 +1627,7 @@ class MaxAction<DataType, CType, precompile::enable_if_decimal<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1783,7 +1783,7 @@ class MaxAction<DataType, CType, precompile::enable_if_string_like<DataType>>
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -1956,7 +1956,7 @@ class SumAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -2117,7 +2117,7 @@ class SumAction<DataType, CType, ResDataType, ResCType,

int RequiredColNum() { return 1; }

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -2284,7 +2284,7 @@ class SumActionPartial<DataType, CType, ResDataType, ResCType,

int RequiredColNum() { return 1; }

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand All @@ -2302,7 +2302,7 @@ class SumActionPartial<DataType, CType, ResDataType, ResCType,
// prepare evaluate lambda
if (in_null_count_) {
*on_valid = [this](int dest_group_id) {
const bool is_null = in_null_count_ > 0 && in_->IsNull(row_id);
const bool is_null = in_->IsNull(row_id);
if (!is_null) {
cache_validity_[dest_group_id] = true;
cache_[dest_group_id] += data_[row_id];
Expand Down Expand Up @@ -2450,7 +2450,7 @@ class SumActionPartial<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand All @@ -2467,7 +2467,7 @@ class SumActionPartial<DataType, CType, ResDataType, ResCType,
// prepare evaluate lambda
if (in_null_count_) {
*on_valid = [this](int dest_group_id) {
const bool is_null = in_null_count_ > 0 && in_->IsNull(row_id);
const bool is_null = in_->IsNull(row_id);
if (!is_null) {
cache_validity_[dest_group_id] = true;
cache_[dest_group_id] += in_->GetView(row_id);
Expand Down Expand Up @@ -2621,7 +2621,7 @@ class AvgAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -2792,7 +2792,7 @@ class AvgAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -2979,7 +2979,7 @@ class SumCountAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand All @@ -3005,7 +3005,7 @@ class SumCountAction<DataType, CType, ResDataType, ResCType,
};
} else {
*on_valid = [this](int dest_group_id) {
const bool is_null = in_null_count_ > 0 && in_->IsNull(row_id);
const bool is_null = in_->IsNull(row_id);
if (!is_null) {
cache_sum_[dest_group_id] += data_[row_id];
cache_count_[dest_group_id] += 1;
Expand Down Expand Up @@ -3168,7 +3168,7 @@ class SumCountAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand All @@ -3185,7 +3185,7 @@ class SumCountAction<DataType, CType, ResDataType, ResCType,
row_id = 0;
if (in_null_count_) {
*on_valid = [this](int dest_group_id) {
const bool is_null = in_null_count_ > 0 && in_->IsNull(row_id);
const bool is_null = in_->IsNull(row_id);
if (!is_null) {
cache_sum_[dest_group_id] += in_->GetView(row_id);
cache_count_[dest_group_id] += 1;
Expand Down Expand Up @@ -3353,7 +3353,7 @@ class SumCountMergeAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -3534,7 +3534,7 @@ class SumCountMergeAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -3704,7 +3704,7 @@ class AvgByCountAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -3884,7 +3884,7 @@ class AvgByCountAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -4079,7 +4079,7 @@ class StddevSampPartialAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -4313,7 +4313,7 @@ class StddevSampPartialAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -4540,7 +4540,7 @@ class StddevSampFinalAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -4747,7 +4747,7 @@ class StddevSampFinalAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -4986,7 +4986,7 @@ class FirstPartialAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -5059,8 +5059,9 @@ class FirstPartialAction<DataType, CType, ResDataType, ResCType,
return arrow::Status::OK();
}
auto input_array = std::make_shared<ArrayType>(in[0]);
int in_null_count = input_array->null_count();
for (int id = 0; id < input_array->length(); id++) {
if (input_array->IsNull(id)) {
if (in_null_count > 0 && input_array->IsNull(id)) {
if (ignore_nulls_) {
continue;
} else {
Expand Down Expand Up @@ -5224,7 +5225,7 @@ class FirstPartialAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -5296,8 +5297,9 @@ class FirstPartialAction<DataType, CType, ResDataType, ResCType,
return arrow::Status::OK();
}
auto input_array = std::make_shared<ArrayType>(in[0]);
int in_null_count = input_array->null_count();
for (int id = 0; id < input_array->length(); id++) {
if (input_array->IsNull(id)) {
if (in_null_count > 0 && input_array->IsNull(id)) {
if (ignore_nulls_) {
continue;
} else {
Expand Down Expand Up @@ -5459,7 +5461,7 @@ class FirstFinalAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down Expand Up @@ -5719,7 +5721,7 @@ class FirstFinalAction<DataType, CType, ResDataType, ResCType,
#endif
}

arrow::Status Submit(ArrayList in_list, int max_group_id,
arrow::Status Submit(const ArrayList& in_list, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null) override {
// resize result data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ class ActionBase {
public:
virtual ~ActionBase() {}

virtual arrow::Status Submit(ArrayList in, int max_group_id,
virtual arrow::Status Submit(const ArrayList& in, int max_group_id,
std::function<arrow::Status(int)>* on_valid,
std::function<arrow::Status()>* on_null);
virtual arrow::Status Submit(std::vector<std::shared_ptr<arrow::Array>> in,
virtual arrow::Status Submit(const std::vector<std::shared_ptr<arrow::Array>>& in,
std::function<arrow::Status(uint64_t, uint64_t)>* on_valid,
std::function<arrow::Status()>* on_null);
virtual arrow::Status Submit(const std::shared_ptr<arrow::Array>& in,
Expand Down