Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
ColumnarWSCG optimization: only GetValue when field is used
Browse files Browse the repository at this point in the history
Signed-off-by: Chendi Xue <chendi.xue@intel.com>
  • Loading branch information
xuechendi committed Jan 8, 2021
1 parent be24bcd commit 8cbe372
Show file tree
Hide file tree
Showing 9 changed files with 395 additions and 329 deletions.
10 changes: 3 additions & 7 deletions cpp/src/codegen/arrow_compute/ext/array_item_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,14 @@ namespace extra {
struct ArrayItemIndex {
uint16_t id = 0;
uint16_t array_id = 0;
bool valid = true;
ArrayItemIndex() : array_id(0), id(0), valid(true) {}
ArrayItemIndex(bool valid) : array_id(0), id(0), valid(valid) {}
ArrayItemIndex(uint16_t array_id, uint16_t id)
: array_id(array_id), id(id), valid(true) {}
ArrayItemIndex() : array_id(0), id(0) {}
ArrayItemIndex(uint16_t array_id, uint16_t id) : array_id(array_id), id(id) {}
};
struct ArrayItemIndexS {
uint16_t id = 0;
uint16_t array_id = 0;
ArrayItemIndexS() : array_id(0), id(0) {}
ArrayItemIndexS(uint16_t array_id, uint16_t id)
: array_id(array_id), id(id) {}
ArrayItemIndexS(uint16_t array_id, uint16_t id) : array_id(array_id), id(id) {}
};

} // namespace extra
Expand Down
78 changes: 36 additions & 42 deletions cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,20 @@ class ProjectKernel::Impl {

std::string GetSignature() { return signature_; }

arrow::Status DoCodeGen(int level, const std::vector<std::string> input,
std::shared_ptr<CodeGenContext>* codegen_ctx_out, int* var_id) {
arrow::Status DoCodeGen(
int level,
std::vector<std::pair<std::pair<std::string, std::string>, gandiva::DataTypePtr>>
input,
std::shared_ptr<CodeGenContext>* codegen_ctx_out, int* var_id) {
auto codegen_ctx = std::make_shared<CodeGenContext>();
int idx = 0;
for (auto project : project_list_) {
std::shared_ptr<ExpressionCodegenVisitor> project_node_visitor;
std::vector<std::string> input_list;
std::vector<int> indices_list;
RETURN_NOT_OK(MakeExpressionCodegenVisitor(project, input, {input_field_list_}, -1,
var_id, &input_list,
auto is_local = false;
RETURN_NOT_OK(MakeExpressionCodegenVisitor(project, &input, {input_field_list_}, -1,
var_id, is_local, &input_list,
&project_node_visitor));
codegen_ctx->process_codes += project_node_visitor->GetPrepare();
auto name = project_node_visitor->GetResult();
Expand All @@ -83,26 +87,19 @@ class ProjectKernel::Impl {
auto output_name =
"project_" + std::to_string(level) + "_output_col_" + std::to_string(idx++);
auto output_validity = output_name + "_validity";
codegen_ctx->output_list.push_back(
std::make_pair(output_name, project->return_type()));
std::stringstream output_get_ss;
output_get_ss << "auto " << output_name << " = " << name << ";" << std::endl;
output_get_ss << "auto " << output_validity << " = " << validity << ";"
<< std::endl;

codegen_ctx->output_list.push_back(std::make_pair(
std::make_pair(output_name, output_get_ss.str()), project->return_type()));
for (auto header : project_node_visitor->GetHeaders()) {
if (std::find(codegen_ctx->header_codes.begin(), codegen_ctx->header_codes.end(),
header) == codegen_ctx->header_codes.end()) {
codegen_ctx->header_codes.push_back(header);
}
}

std::stringstream process_ss;
std::stringstream define_ss;

process_ss << output_name << " = " << name << ";" << std::endl;
process_ss << output_validity << " = " << validity << ";" << std::endl;
codegen_ctx->process_codes += process_ss.str();

define_ss << GetCTypeString(project->return_type()) << " " << output_name << ";"
<< std::endl;
define_ss << "bool " << output_validity << ";" << std::endl;
codegen_ctx->definition_codes += define_ss.str();
}
*codegen_ctx_out = codegen_ctx;
return arrow::Status::OK();
Expand Down Expand Up @@ -139,9 +136,11 @@ arrow::Status ProjectKernel::MakeResultIterator(

std::string ProjectKernel::GetSignature() { return impl_->GetSignature(); }

arrow::Status ProjectKernel::DoCodeGen(int level, std::vector<std::string> input,
std::shared_ptr<CodeGenContext>* codegen_ctx,
int* var_id) {
arrow::Status ProjectKernel::DoCodeGen(
int level,
std::vector<std::pair<std::pair<std::string, std::string>, gandiva::DataTypePtr>>
input,
std::shared_ptr<CodeGenContext>* codegen_ctx, int* var_id) {
return impl_->DoCodeGen(level, input, codegen_ctx, var_id);
}

Expand All @@ -166,14 +165,18 @@ class FilterKernel::Impl {

std::string GetSignature() { return signature_; }

arrow::Status DoCodeGen(int level, const std::vector<std::string> input,
std::shared_ptr<CodeGenContext>* codegen_ctx_out, int* var_id) {
arrow::Status DoCodeGen(
int level,
std::vector<std::pair<std::pair<std::string, std::string>, gandiva::DataTypePtr>>
input,
std::shared_ptr<CodeGenContext>* codegen_ctx_out, int* var_id) {
auto codegen_ctx = std::make_shared<CodeGenContext>();
std::shared_ptr<ExpressionCodegenVisitor> condition_node_visitor;
std::vector<std::string> input_list;
std::vector<int> indices_list;
RETURN_NOT_OK(MakeExpressionCodegenVisitor(condition_, input, {input_field_list_}, -1,
var_id, &input_list,
auto is_local = false;
RETURN_NOT_OK(MakeExpressionCodegenVisitor(condition_, &input, {input_field_list_},
-1, var_id, is_local, &input_list,
&condition_node_visitor));
codegen_ctx->process_codes += condition_node_visitor->GetPrepare();
for (auto header : condition_node_visitor->GetHeaders()) {
Expand All @@ -185,27 +188,16 @@ class FilterKernel::Impl {

auto condition_codes = condition_node_visitor->GetResult();
std::stringstream process_ss;
std::stringstream define_ss;
process_ss << "if (!(" << condition_codes << ")) {" << std::endl;
process_ss << "continue;" << std::endl;
process_ss << "}" << std::endl;
int idx = 0;
for (auto field : input_field_list_) {
auto output_name =
"filter_" + std::to_string(level) + "_output_col_" + std::to_string(idx);
auto output_validity = output_name + "_validity";
codegen_ctx->output_list.push_back(std::make_pair(output_name, field->type()));

define_ss << GetCTypeString(field->type()) << " " << output_name << ";"
<< std::endl;
define_ss << "bool " << output_validity << ";" << std::endl;

process_ss << output_name << " = " << input[idx] << ";" << std::endl;
process_ss << output_validity << " = " << input[idx] << "_validity"
<< ";" << std::endl;
codegen_ctx->output_list.push_back(
std::make_pair(std::make_pair(input[idx].first.first, input[idx].first.second),
field->type()));
idx++;
}
codegen_ctx->definition_codes += define_ss.str();
codegen_ctx->process_codes += process_ss.str();

*codegen_ctx_out = codegen_ctx;
Expand Down Expand Up @@ -244,9 +236,11 @@ arrow::Status FilterKernel::MakeResultIterator(

std::string FilterKernel::GetSignature() { return impl_->GetSignature(); }

arrow::Status FilterKernel::DoCodeGen(int level, std::vector<std::string> input,
std::shared_ptr<CodeGenContext>* codegen_ctx,
int* var_id) {
arrow::Status FilterKernel::DoCodeGen(
int level,
std::vector<std::pair<std::pair<std::string, std::string>, gandiva::DataTypePtr>>
input,
std::shared_ptr<CodeGenContext>* codegen_ctx, int* var_id) {
return impl_->DoCodeGen(level, input, codegen_ctx, var_id);
}

Expand Down
4 changes: 3 additions & 1 deletion cpp/src/codegen/arrow_compute/ext/codegen_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,7 @@ struct CodeGenContext {
std::string finish_codes;
std::string definition_codes;
std::vector<std::string> function_list;
std::vector<std::pair<std::string, std::shared_ptr<arrow::DataType>>> output_list;
std::vector<
std::pair<std::pair<std::string, std::string>, std::shared_ptr<arrow::DataType>>>
output_list;
};
Loading

0 comments on commit 8cbe372

Please sign in to comment.