diff --git a/velox/functions/prestosql/ArrayConstructor.cpp b/velox/functions/prestosql/ArrayConstructor.cpp index 965fcaccb27e..37902a86d5c4 100644 --- a/velox/functions/prestosql/ArrayConstructor.cpp +++ b/velox/functions/prestosql/ArrayConstructor.cpp @@ -55,24 +55,56 @@ class ArrayConstructor : public exec::VectorFunction { } else { elementsResult->resize(baseOffset + numArgs * rows.countSelected()); - std::vector ranges; - ranges.reserve(rows.end()); + if (shouldCopyRanges(elementsResult->type())) { + std::vector ranges; + ranges.reserve(rows.end()); - vector_size_t offset = baseOffset; - rows.applyToSelected([&](vector_size_t row) { - rawSizes[row] = numArgs; - rawOffsets[row] = offset; - ranges.push_back({row, offset, 1}); - offset += numArgs; - }); + vector_size_t offset = baseOffset; + rows.applyToSelected([&](vector_size_t row) { + rawSizes[row] = numArgs; + rawOffsets[row] = offset; + ranges.push_back({row, offset, 1}); + offset += numArgs; + }); + + elementsResult->copyRanges(args[0].get(), ranges); + + for (int i = 1; i < numArgs; i++) { + for (auto& range : ranges) { + ++range.targetIndex; + } + elementsResult->copyRanges(args[i].get(), ranges); + } + } else { + SelectivityVector targetRows(elementsResult->size(), false); + std::vector toSourceRow(elementsResult->size()); + + vector_size_t offset = baseOffset; + rows.applyToSelected([&](vector_size_t row) { + rawSizes[row] = numArgs; + rawOffsets[row] = offset; + + targetRows.setValid(offset, true); + toSourceRow[offset] = row; + + offset += numArgs; + }); + targetRows.updateBounds(); + elementsResult->copy(args[0].get(), targetRows, toSourceRow.data()); + + for (int i = 1; i < numArgs; i++) { + targetRows.clearAll(); - elementsResult->copyRanges(args[0].get(), ranges); + vector_size_t offset = baseOffset; + rows.applyToSelected([&](vector_size_t row) { + targetRows.setValid(offset + i, true); + toSourceRow[offset + i] = row; + offset += numArgs; + }); - for (int i = 1; i < numArgs; i++) { - for (auto& range : ranges) { - ++range.targetIndex; + targetRows.updateBounds(); + elementsResult->copy(args[i].get(), targetRows, toSourceRow.data()); } - elementsResult->copyRanges(args[i].get(), ranges); } } } @@ -90,6 +122,28 @@ class ArrayConstructor : public exec::VectorFunction { .build(), }; } + + private: + // BaseVector::copyRange is faster for arrays and maps and slower for + // primitive types. Check if 'type' is an array or map or contains an array or + // map. If so, return true, otherwise, false. + static bool shouldCopyRanges(const TypePtr& type) { + if (type->isPrimitiveType()) { + return false; + } + + if (!type->isRow()) { + return true; + } + + const auto& rowType = type->asRow(); + for (const auto& child : rowType.children()) { + if (shouldCopyRanges(child)) { + return true; + } + } + return false; + } }; } // namespace diff --git a/velox/functions/prestosql/benchmarks/ArrayConstructorBenchmark.cpp b/velox/functions/prestosql/benchmarks/ArrayConstructorBenchmark.cpp new file mode 100644 index 000000000000..8b8acdc2be80 --- /dev/null +++ b/velox/functions/prestosql/benchmarks/ArrayConstructorBenchmark.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include "velox/benchmarks/ExpressionBenchmarkBuilder.h" +#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h" +#include "velox/functions/prestosql/registration/RegistrationFunctions.h" + +using namespace facebook::velox; +using namespace facebook::velox::exec; +using namespace facebook::velox::functions; + +int main(int argc, char** argv) { + folly::Init init(&argc, &argv); + + functions::prestosql::registerArrayFunctions(); + + ExpressionBenchmarkBuilder benchmarkBuilder; + + auto* pool = benchmarkBuilder.pool(); + auto& vm = benchmarkBuilder.vectorMaker(); + + auto createSet = + [&](const TypePtr& type, bool withNulls, const VectorPtr& constantInput) { + VectorFuzzer::Options options; + options.vectorSize = 1'000; + options.nullRatio = withNulls ? 0.2 : 0.0; + + VectorFuzzer fuzzer(options, pool); + std::vector columns; + columns.push_back(fuzzer.fuzzFlat(type)); + columns.push_back(fuzzer.fuzzFlat(type)); + columns.push_back(fuzzer.fuzzFlat(type)); + columns.push_back( + BaseVector::createNullConstant(type, options.vectorSize, pool)); + columns.push_back( + BaseVector::wrapInConstant(options.vectorSize, 0, constantInput)); + + auto input = vm.rowVector({"c0", "c1", "c2", "n", "c"}, columns); + + benchmarkBuilder + .addBenchmarkSet( + fmt::format( + "array_constructor_{}_{}", + mapTypeKindToName(type->kind()), + withNulls ? "nulls" : "nullfree"), + input) + .addExpression("1", "array_constructor(c0)") + .addExpression("2", "array_constructor(c0, c1)") + .addExpression("3", "array_constructor(c0, c1, c2)") + .addExpression("2_null", "array_constructor(c0, c1, n)") + .addExpression("2_const", "array_constructor(c0, c1, c)"); + }; + + auto constantInteger = BaseVector::createConstant(INTEGER(), 11, 1, pool); + createSet(INTEGER(), true, constantInteger); + createSet(INTEGER(), false, constantInteger); + + auto constantRow = vm.rowVector({ + BaseVector::createConstant(INTEGER(), 11, 1, pool), + BaseVector::createConstant(DOUBLE(), 1.23, 1, pool), + }); + createSet(ROW({INTEGER(), DOUBLE()}), true, constantRow); + createSet(ROW({INTEGER(), DOUBLE()}), false, constantRow); + + auto constantArray = vm.arrayVector({{1, 2, 3, 4, 5}}); + createSet(ARRAY(INTEGER()), true, constantArray); + createSet(ARRAY(INTEGER()), false, constantArray); + + auto constantMap = vm.mapVector({{{1, 1.23}, {2, 2.34}}}); + createSet(MAP(INTEGER(), REAL()), true, constantMap); + createSet(MAP(INTEGER(), REAL()), false, constantMap); + + benchmarkBuilder.registerBenchmarks(); + + folly::runBenchmarks(); + return 0; +}