Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(udf): array_combine & array_join #3945

Merged
merged 6 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions cases/query/udf_query.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,98 @@ cases:
- c1 bool
data: |
true, false
- id: array_join
mode: request-unsupport
sql: |
select
array_join(["1", "2"], ",") c1,
array_join(["1", "2"], "") c2,
array_join(["1", "2"], cast(null as string)) c3,
array_join(["1", NULL, "4", "5", NULL], "-") c4,
array_join(array<string>[], ",") as c5
expect:
columns:
- c1 string
- c2 string
- c3 string
- c4 string
- c5 string
rows:
- ["1,2", "12", "12", "1-4-5", ""]
- id: array_combine
mode: request-unsupport
sql: |
select
array_join(array_combine("-", ["1", "2"], ["3", "4"]), ",") c0,
expect:
columns:
- c0 string
rows:
- ["1-3,1-4,2-3,2-4"]

- id: array_combine_2
desc: array_combine casting array to array<string> first
mode: request-unsupport
sql: |
select
array_join(array_combine("-", [1, 2], [3, 4]), ",") c0,
array_join(array_combine("-", [1, 2], array<int64>[3], ["5", "6"]), ",") c1,
array_join(array_combine("|", ["1"], [timestamp(1717171200000), timestamp("2024-06-02 12:00:00")]), ",") c2,
array_join(array_combine("|", ["1"]), ",") c3,
expect:
columns:
- c0 string
- c1 string
- c2 string
- c3 string
rows:
- ["1-3,1-4,2-3,2-4", "1-3-5,1-3-6,2-3-5,2-3-6", "1|2024-06-01 00:00:00,1|2024-06-02 12:00:00", "1"]
- id: array_combine_3
desc: null values skipped
mode: request-unsupport
sql: |
select
array_join(array_combine("-", [1, NULL], [3, 4]), ",") c0,
array_join(array_combine("-", ARRAY<INT>[NULL], ["9", "8"]), ",") c1,
array_join(array_combine(string(NULL), ARRAY<INT>[1], ["9", "8"]), ",") c2,
expect:
columns:
- c0 string
- c1 string
- c2 string
rows:
- ["1-3,1-4", "", "19,18"]
- id: array_combine_4
desc: construct array from table
mode: request-unsupport
inputs:
- name: t1
columns: ["col1:int32", "std_ts:timestamp", "col2:string"]
indexs: ["index1:col1:std_ts"]
rows:
- [1, 1590115420001, "foo"]
- [2, 1590115420001, "bar"]
sql: |
select
col1,
array_join(array_combine("-", [col1, 10], [col2, "c2"]), ",") c0,
from t1
expect:
columns:
- col1 int32
- c0 string
rows:
- [1, "1-foo,1-c2,10-foo,10-c2"]
- [2, "2-bar,2-c2,10-bar,10-c2"]
- id: array_combine_err1
mode: request-unsupport
sql: |
select
array_join(array_combine("-"), ",") c0,
expect:
success: false
msg: |
Fail to resolve expression: array_join(array_combine(-), ,)

# ================================================================
# Map data type
Expand Down
62 changes: 62 additions & 0 deletions hybridse/src/base/cartesian_product.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/**
* Copyright (c) 2024 OpenMLDB authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "base/cartesian_product.h"

#include <algorithm>

#include "absl/types/span.h"

namespace hybridse {
namespace base {

static auto cartesian_product(const std::vector<std::vector<int>>& lists) {
std::vector<std::vector<int>> result;
if (std::find_if(std::begin(lists), std::end(lists), [](auto e) -> bool { return e.size() == 0; }) !=
std::end(lists)) {
return result;

Check warning on line 30 in hybridse/src/base/cartesian_product.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/base/cartesian_product.cc#L30

Added line #L30 was not covered by tests
}
for (auto& e : lists[0]) {
result.push_back({e});
}
for (size_t i = 1; i < lists.size(); ++i) {
std::vector<std::vector<int>> temp;
for (auto& e : result) {
for (auto f : lists[i]) {
auto e_tmp = e;
e_tmp.push_back(f);
temp.push_back(e_tmp);
}
}
result = temp;
}
return result;
}

std::vector<std::vector<int>> cartesian_product(absl::Span<int const> vec) {
std::vector<std::vector<int>> input;
for (auto& v : vec) {
std::vector<int> seq(v, 0);
for (int i = 0; i < v; ++i) {
seq[i] = i;
}
input.push_back(seq);
}
return cartesian_product(input);
}

} // namespace base
} // namespace hybridse
32 changes: 32 additions & 0 deletions hybridse/src/base/cartesian_product.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* Copyright (c) 2024 OpenMLDB authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef HYBRIDSE_SRC_BASE_CARTESIAN_PRODUCT_H_
#define HYBRIDSE_SRC_BASE_CARTESIAN_PRODUCT_H_

#include <vector>

#include "absl/types/span.h"

namespace hybridse {
namespace base {

std::vector<std::vector<int>> cartesian_product(absl::Span<int const> vec);

} // namespace base
} // namespace hybridse

#endif // HYBRIDSE_SRC_BASE_CARTESIAN_PRODUCT_H_
115 changes: 115 additions & 0 deletions hybridse/src/codegen/array_ir_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@

#include <string>

#include "absl/strings/substitute.h"
#include "base/fe_status.h"
#include "codegen/cast_expr_ir_builder.h"
#include "codegen/context.h"
#include "codegen/ir_base_builder.h"
#include "codegen/string_ir_builder.h"

namespace hybridse {
namespace codegen {
Expand Down Expand Up @@ -122,5 +126,116 @@
return true;
}

absl::StatusOr<NativeValue> ArrayIRBuilder::ExtractElement(CodeGenContextBase* ctx, const NativeValue& arr,

Check warning on line 129 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L129

Added line #L129 was not covered by tests
const NativeValue& key) const {
return absl::UnimplementedError("array extract element");

Check warning on line 131 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L131

Added line #L131 was not covered by tests
}

absl::StatusOr<llvm::Value*> ArrayIRBuilder::NumElements(CodeGenContextBase* ctx, llvm::Value* arr) const {
llvm::Value* out = nullptr;
if (!Load(ctx->GetCurrentBlock(), arr, SZ_IDX, &out)) {
return absl::InternalError("codegen: fail to extract array size");

Check warning on line 137 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L134-L137

Added lines #L134 - L137 were not covered by tests
}

return out;

Check warning on line 140 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L140

Added line #L140 was not covered by tests
}

absl::StatusOr<llvm::Value*> ArrayIRBuilder::CastToArrayString(CodeGenContextBase* ctx, llvm::Value* src) {
auto sb = StructTypeIRBuilder::CreateStructTypeIRBuilder(ctx->GetModule(), src->getType());
CHECK_ABSL_STATUSOR(sb);

ArrayIRBuilder* src_builder = dynamic_cast<ArrayIRBuilder*>(sb.value().get());
if (!src_builder) {
return absl::InvalidArgumentError("input value not a array");

Check warning on line 149 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L149

Added line #L149 was not covered by tests
}

llvm::Type* src_ele_type = src_builder->element_type_;
if (IsStringPtr(src_ele_type)) {
// already array<string>
return src;

Check warning on line 155 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L155

Added line #L155 was not covered by tests
}

auto fields = src_builder->Load(ctx, src);
CHECK_ABSL_STATUSOR(fields);
llvm::Value* src_raws = fields.value().at(RAW_IDX);
llvm::Value* src_nulls = fields.value().at(NULL_IDX);
llvm::Value* num_elements = fields.value().at(SZ_IDX);

llvm::Value* casted = nullptr;
if (!CreateDefault(ctx->GetCurrentBlock(), &casted)) {
return absl::InternalError("codegen error: fail to construct default array");

Check warning on line 166 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L166

Added line #L166 was not covered by tests
}
// initialize each element
CHECK_ABSL_STATUS(Initialize(ctx, casted, {num_elements}));

auto builder = ctx->GetBuilder();
auto dst_fields = Load(ctx, casted);
CHECK_ABSL_STATUSOR(fields);
auto* raw_array_ptr = dst_fields.value().at(RAW_IDX);
auto* nullables_ptr = dst_fields.value().at(NULL_IDX);

llvm::Type* idx_type = builder->getInt64Ty();
llvm::Value* idx = builder->CreateAlloca(idx_type);
builder->CreateStore(builder->getInt64(0), idx);
CHECK_STATUS_TO_ABSL(ctx->CreateWhile(
[&](llvm::Value** cond) -> base::Status {
*cond = builder->CreateICmpSLT(builder->CreateLoad(idx_type, idx), num_elements);
return {};
},
[&]() -> base::Status {
llvm::Value* idx_val = builder->CreateLoad(idx_type, idx);
codegen::CastExprIRBuilder cast_builder(ctx->GetCurrentBlock());

llvm::Value* src_ele_value =
builder->CreateLoad(src_ele_type, builder->CreateGEP(src_ele_type, src_raws, idx_val));
llvm::Value* dst_ele =
builder->CreateLoad(element_type_, builder->CreateGEP(element_type_, raw_array_ptr, idx_val));

codegen::StringIRBuilder str_builder(ctx->GetModule());
auto s = str_builder.CastFrom(ctx->GetCurrentBlock(), src_ele_value, dst_ele);
CHECK_TRUE(s.ok(), common::kCodegenError, s.ToString());

builder->CreateStore(
builder->CreateLoad(builder->getInt1Ty(), builder->CreateGEP(builder->getInt1Ty(), src_nulls, idx_val)),
builder->CreateGEP(builder->getInt1Ty(), nullables_ptr, idx_val));

builder->CreateStore(builder->CreateAdd(idx_val, builder->getInt64(1)), idx);
return {};
}));

CHECK_ABSL_STATUS(Set(ctx, casted, {raw_array_ptr, nullables_ptr, num_elements}));
return casted;
}

absl::Status ArrayIRBuilder::Initialize(CodeGenContextBase* ctx, ::llvm::Value* alloca,
absl::Span<llvm::Value* const> args) const {
auto* builder = ctx->GetBuilder();
StringIRBuilder str_builder(ctx->GetModule());
auto ele_type = str_builder.GetType();
if (!alloca->getType()->isPointerTy() || alloca->getType()->getPointerElementType() != struct_type_ ||
ele_type->getPointerTo() != element_type_) {
return absl::UnimplementedError(absl::Substitute(
"not able to Initialize array except array<string>, got type $0", GetLlvmObjectString(alloca->getType())));

Check warning on line 218 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L217-L218

Added lines #L217 - L218 were not covered by tests
}
if (args.size() != 1) {
// require one argument that is array size
return absl::InvalidArgumentError("initialize array requries one argument which is array size");

Check warning on line 222 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L222

Added line #L222 was not covered by tests
}
if (!args[0]->getType()->isIntegerTy()) {
return absl::InvalidArgumentError("array size argument should be integer");

Check warning on line 225 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L225

Added line #L225 was not covered by tests
}
auto sz = args[0];
if (sz->getType() != builder->getInt64Ty()) {
CastExprIRBuilder cast_builder(ctx->GetCurrentBlock());
base::Status s;
cast_builder.SafeCastNumber(sz, builder->getInt64Ty(), &sz, s);
CHECK_STATUS_TO_ABSL(s);

Check warning on line 232 in hybridse/src/codegen/array_ir_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/array_ir_builder.cc#L229-L232

Added lines #L229 - L232 were not covered by tests
}
auto fn = ctx->GetModule()->getOrInsertFunction("hybridse_alloc_array_string", builder->getVoidTy(),
struct_type_->getPointerTo(), builder->getInt64Ty());

builder->CreateCall(fn, {alloca, sz});
return absl::OkStatus();
}
} // namespace codegen
} // namespace hybridse
14 changes: 12 additions & 2 deletions hybridse/src/codegen/array_ir_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,21 @@ class ArrayIRBuilder : public StructTypeIRBuilder {
CHECK_TRUE(false, common::kCodegenError, "casting to array un-implemented");
};

private:
void InitStructType() override;
absl::StatusOr<llvm::Value*> CastToArrayString(CodeGenContextBase* ctx, llvm::Value* src);

absl::StatusOr<NativeValue> ExtractElement(CodeGenContextBase* ctx, const NativeValue& arr,
const NativeValue& key) const override;

absl::StatusOr<llvm::Value*> NumElements(CodeGenContextBase* ctx, llvm::Value* arr) const override;

bool CreateDefault(::llvm::BasicBlock* block, ::llvm::Value** output) override;

absl::Status Initialize(CodeGenContextBase* ctx, ::llvm::Value* alloca,
absl::Span<llvm::Value* const> args) const override;

private:
void InitStructType() override;

private:
::llvm::Type* element_type_ = nullptr;
};
Expand Down
20 changes: 18 additions & 2 deletions hybridse/src/codegen/ir_base_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -575,12 +575,12 @@
if (type_pointee->isStructTy()) {
auto* key_type = type_pointee->getStructElementType(1);
const node::TypeNode* key = nullptr;
if (key_type->isPointerTy() && !GetFullType(nm, key_type->getPointerElementType(), &key)) {
if (!key_type->isPointerTy() || !GetFullType(nm, key_type->getPointerElementType(), &key)) {
return false;
}
const node::TypeNode* value = nullptr;
auto* value_type = type_pointee->getStructElementType(2);
if (value_type->isPointerTy() && !GetFullType(nm, value_type->getPointerElementType(), &value)) {
if (!value_type->isPointerTy() || !GetFullType(nm, value_type->getPointerElementType(), &value)) {
return false;
}

Expand All @@ -590,6 +590,22 @@
}
return false;
}
case hybridse::node::kArray: {
if (type->isPointerTy()) {
auto type_pointee = type->getPointerElementType();
if (type_pointee->isStructTy()) {
auto* key_type = type_pointee->getStructElementType(0);
const node::TypeNode* key = nullptr;
if (!key_type->isPointerTy() || !GetFullType(nm, key_type->getPointerElementType(), &key)) {
return false;

Check warning on line 600 in hybridse/src/codegen/ir_base_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/ir_base_builder.cc#L600

Added line #L600 was not covered by tests
}

*type_node = nm->MakeNode<node::TypeNode>(node::DataType::kArray, key);
return true;
}
}
return false;

Check warning on line 607 in hybridse/src/codegen/ir_base_builder.cc

View check run for this annotation

Codecov / codecov/patch

hybridse/src/codegen/ir_base_builder.cc#L607

Added line #L607 was not covered by tests
}
default: {
*type_node = nm->MakeTypeNode(base);
return true;
Expand Down
Loading
Loading