From 334f3dca872ec774c5a7b550b5a2694e15d91921 Mon Sep 17 00:00:00 2001 From: Masha Basmanova Date: Fri, 15 Sep 2023 04:11:43 -0700 Subject: [PATCH] Optimize array_constructor (#6568) Summary: array_constructor is very slow: https://github.com/facebookincubator/velox/discussions/5958#discussioncomment-6615155a array_constructor uses BaseVector::copyRanges, which is somewhat fast for arrays and maps, but very slow for primitive types: ``` FlatVector.h void copyRanges( const BaseVector* source, const folly::Range& ranges) override { for (auto& range : ranges) { copy(source, range.targetIndex, range.sourceIndex, range.count); } } ``` FlatVector::copy(source, rows, toSourceRow) is faster. Switching from copyRanges to copy speeds up array_constructor for primitive types and structs significantly. Yet, this change makes arrays and maps slower. The slowness is due to ArrayVector and MapVector not having implementation for copy(source, rows, toSourceRow). They rely on BaseVector::copy to translate rows + toSourceRow to ranges. This extra processing causes perf regression. Hence, we use copy for primitive types and structs of these and copyRanges for everything else. ``` Before: array_constructor_ARRAY_nullfree##1 16.80ms 59.53 array_constructor_ARRAY_nullfree##2 27.02ms 37.01 array_constructor_ARRAY_nullfree##3 38.03ms 26.30 array_constructor_ARRAY_nullfree##2_null 52.86ms 18.92 array_constructor_ARRAY_nullfree##2_const 54.97ms 18.19 array_constructor_ARRAY_nulls##1 30.61ms 32.66 array_constructor_ARRAY_nulls##2 55.01ms 18.18 array_constructor_ARRAY_nulls##3 80.69ms 12.39 array_constructor_ARRAY_nulls##2_null 69.10ms 14.47 array_constructor_ARRAY_nulls##2_const 103.85ms 9.63 After: array_constructor_ARRAY_nullfree##1 15.25ms 65.58 array_constructor_ARRAY_nullfree##2 25.11ms 39.82 array_constructor_ARRAY_nullfree##3 34.59ms 28.91 array_constructor_ARRAY_nullfree##2_null 53.61ms 18.65 array_constructor_ARRAY_nullfree##2_const 51.48ms 19.42 array_constructor_ARRAY_nulls##1 29.99ms 33.34 array_constructor_ARRAY_nulls##2 55.91ms 17.89 array_constructor_ARRAY_nulls##3 81.73ms 12.24 array_constructor_ARRAY_nulls##2_null 66.97ms 14.93 array_constructor_ARRAY_nulls##2_const 92.96ms 10.76 Before: array_constructor_INTEGER_nullfree##1 19.72ms 50.71 array_constructor_INTEGER_nullfree##2 34.51ms 28.97 array_constructor_INTEGER_nullfree##3 47.95ms 20.86 array_constructor_INTEGER_nullfree##2_null 58.68ms 17.04 array_constructor_INTEGER_nullfree##2_const 45.15ms 22.15 array_constructor_INTEGER_nulls##1 29.99ms 33.34 array_constructor_INTEGER_nulls##2 55.32ms 18.08 array_constructor_INTEGER_nulls##3 78.53ms 12.73 array_constructor_INTEGER_nulls##2_null 72.24ms 13.84 array_constructor_INTEGER_nulls##2_const 71.13ms 14.06 After: array_constructor_INTEGER_nullfree##1 3.39ms 294.89 array_constructor_INTEGER_nullfree##2 7.35ms 136.10 array_constructor_INTEGER_nullfree##3 10.78ms 92.74 array_constructor_INTEGER_nullfree##2_null 11.29ms 88.57 array_constructor_INTEGER_nullfree##2_const 10.14ms 98.65 array_constructor_INTEGER_nulls##1 4.49ms 222.53 array_constructor_INTEGER_nulls##2 9.78ms 102.29 array_constructor_INTEGER_nulls##3 14.69ms 68.08 array_constructor_INTEGER_nulls##2_null 12.14ms 82.36 array_constructor_INTEGER_nulls##2_const 12.27ms 81.53 Before: array_constructor_MAP_nullfree##1 17.34ms 57.65 array_constructor_MAP_nullfree##2 29.84ms 33.51 array_constructor_MAP_nullfree##3 41.51ms 24.09 array_constructor_MAP_nullfree##2_null 56.57ms 17.68 array_constructor_MAP_nullfree##2_const 71.68ms 13.95 array_constructor_MAP_nulls##1 36.22ms 27.61 array_constructor_MAP_nulls##2 68.18ms 14.67 array_constructor_MAP_nulls##3 95.12ms 10.51 array_constructor_MAP_nulls##2_null 86.42ms 11.57 array_constructor_MAP_nulls##2_const 120.10ms 8.33 After: array_constructor_MAP_nullfree##1 17.05ms 58.66 array_constructor_MAP_nullfree##2 28.42ms 35.18 array_constructor_MAP_nullfree##3 36.96ms 27.06 array_constructor_MAP_nullfree##2_null 55.64ms 17.97 array_constructor_MAP_nullfree##2_const 67.53ms 14.81 array_constructor_MAP_nulls##1 32.91ms 30.39 array_constructor_MAP_nulls##2 64.50ms 15.50 array_constructor_MAP_nulls##3 95.71ms 10.45 array_constructor_MAP_nulls##2_null 77.22ms 12.95 array_constructor_MAP_nulls##2_const 114.91ms 8.70 Before: array_constructor_ROW_nullfree##1 33.88ms 29.52 array_constructor_ROW_nullfree##2 62.00ms 16.13 array_constructor_ROW_nullfree##3 89.54ms 11.17 array_constructor_ROW_nullfree##2_null 78.46ms 12.75 array_constructor_ROW_nullfree##2_const 95.53ms 10.47 array_constructor_ROW_nulls##1 44.11ms 22.67 array_constructor_ROW_nulls##2 115.43ms 8.66 array_constructor_ROW_nulls##3 173.61ms 5.76 array_constructor_ROW_nulls##2_null 130.40ms 7.67 array_constructor_ROW_nulls##2_const 169.97ms 5.88 After: array_constructor_ROW_nullfree##1 5.55ms 180.15 array_constructor_ROW_nullfree##2 12.83ms 77.94 array_constructor_ROW_nullfree##3 18.89ms 52.95 array_constructor_ROW_nullfree##2_null 18.74ms 53.36 array_constructor_ROW_nullfree##2_const 18.16ms 55.07 array_constructor_ROW_nulls##1 11.29ms 88.61 array_constructor_ROW_nulls##2 18.57ms 53.86 array_constructor_ROW_nulls##3 34.20ms 29.24 array_constructor_ROW_nulls##2_null 25.05ms 39.92 array_constructor_ROW_nulls##2_const 25.15ms 39.77 ``` Differential Revision: D49272797 --- .../functions/prestosql/ArrayConstructor.cpp | 81 +++++++++++++--- .../benchmarks/ArrayConstructorBenchmark.cpp | 94 +++++++++++++++++++ 2 files changed, 161 insertions(+), 14 deletions(-) create mode 100644 velox/functions/prestosql/benchmarks/ArrayConstructorBenchmark.cpp diff --git a/velox/functions/prestosql/ArrayConstructor.cpp b/velox/functions/prestosql/ArrayConstructor.cpp index 965fcaccb27e2..29acd96b99c90 100644 --- a/velox/functions/prestosql/ArrayConstructor.cpp +++ b/velox/functions/prestosql/ArrayConstructor.cpp @@ -55,24 +55,55 @@ class ArrayConstructor : public exec::VectorFunction { } else { elementsResult->resize(baseOffset + numArgs * rows.countSelected()); - std::vector ranges; - ranges.reserve(rows.end()); + if (shouldCopyRanges(elementsResult->type())) { + std::vector ranges; + ranges.reserve(rows.end()); - vector_size_t offset = baseOffset; - rows.applyToSelected([&](vector_size_t row) { - rawSizes[row] = numArgs; - rawOffsets[row] = offset; - ranges.push_back({row, offset, 1}); - offset += numArgs; - }); + vector_size_t offset = baseOffset; + rows.applyToSelected([&](vector_size_t row) { + rawSizes[row] = numArgs; + rawOffsets[row] = offset; + ranges.push_back({row, offset, 1}); + offset += numArgs; + }); - elementsResult->copyRanges(args[0].get(), ranges); + elementsResult->copyRanges(args[0].get(), ranges); - for (int i = 1; i < numArgs; i++) { - for (auto& range : ranges) { - ++range.targetIndex; + for (int i = 1; i < numArgs; i++) { + for (auto& range : ranges) { + ++range.targetIndex; + } + elementsResult->copyRanges(args[i].get(), ranges); + } + } else { + SelectivityVector targetRows(elementsResult->size(), false); + std::vector toSourceRow(elementsResult->size()); + + vector_size_t offset = baseOffset; + rows.applyToSelected([&](vector_size_t row) { + rawSizes[row] = numArgs; + rawOffsets[row] = offset; + + targetRows.setValid(offset, true); + toSourceRow[offset] = row; + + offset += numArgs; + }); + targetRows.updateBounds(); + elementsResult->copy(args[0].get(), targetRows, toSourceRow.data()); + + for (int i = 1; i < numArgs; i++) { + targetRows.clearAll(); + vector_size_t offset = baseOffset; + rows.applyToSelected([&](vector_size_t row) { + targetRows.setValid(offset + i, true); + toSourceRow[offset + i] = row; + offset += numArgs; + }); + + targetRows.updateBounds(); + elementsResult->copy(args[i].get(), targetRows, toSourceRow.data()); } - elementsResult->copyRanges(args[i].get(), ranges); } } } @@ -90,6 +121,28 @@ class ArrayConstructor : public exec::VectorFunction { .build(), }; } + + private: + // BaseVector::copyRange is faster for arrays and maps and slower for + // primitive types. Check if 'type' is an array or map or contains an array or + // map. If so, return true, otherwise, false. + static bool shouldCopyRanges(const TypePtr& type) { + if (type->isPrimitiveType()) { + return false; + } + + if (!type->isRow()) { + return true; + } + + const auto& rowType = type->asRow(); + for (const auto& child : rowType.children()) { + if (shouldCopyRanges(child)) { + return true; + } + } + return false; + } }; } // namespace diff --git a/velox/functions/prestosql/benchmarks/ArrayConstructorBenchmark.cpp b/velox/functions/prestosql/benchmarks/ArrayConstructorBenchmark.cpp new file mode 100644 index 0000000000000..b9f3094f70b22 --- /dev/null +++ b/velox/functions/prestosql/benchmarks/ArrayConstructorBenchmark.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include "velox/benchmarks/ExpressionBenchmarkBuilder.h" +#include "velox/functions/lib/LambdaFunctionUtil.h" +#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h" +#include "velox/functions/prestosql/ArrayFunctions.h" +#include "velox/functions/prestosql/registration/RegistrationFunctions.h" + +using namespace facebook::velox; +using namespace facebook::velox::exec; +using namespace facebook::velox::functions; + +int main(int argc, char** argv) { + folly::init(&argc, &argv); + + functions::prestosql::registerArrayFunctions(); + + ExpressionBenchmarkBuilder benchmarkBuilder; + + auto* pool = benchmarkBuilder.pool(); + auto& vm = benchmarkBuilder.vectorMaker(); + + auto createSet = + [&](const TypePtr& type, bool withNulls, const VectorPtr& constantInput) { + VectorFuzzer::Options options; + options.vectorSize = 1'000; + options.nullRatio = withNulls ? 0.2 : 0.0; + + VectorFuzzer fuzzer(options, pool); + std::vector columns; + columns.push_back(fuzzer.fuzzFlat(type)); + columns.push_back(fuzzer.fuzzFlat(type)); + columns.push_back(fuzzer.fuzzFlat(type)); + columns.push_back( + BaseVector::createNullConstant(type, options.vectorSize, pool)); + columns.push_back( + BaseVector::wrapInConstant(options.vectorSize, 0, constantInput)); + + auto input = vm.rowVector({"c0", "c1", "c2", "n", "c"}, columns); + + benchmarkBuilder + .addBenchmarkSet( + fmt::format( + "array_constructor_{}_{}", + mapTypeKindToName(type->kind()), + withNulls ? "nulls" : "nullfree"), + input) + .addExpression("1", "array_constructor(c0)") + .addExpression("2", "array_constructor(c0, c1)") + .addExpression("3", "array_constructor(c0, c1, c2)") + .addExpression("2_null", "array_constructor(c0, c1, n)") + .addExpression("2_const", "array_constructor(c0, c1, c)"); + }; + + auto constantInteger = BaseVector::createConstant(INTEGER(), 11, 1, pool); + createSet(INTEGER(), true, constantInteger); + createSet(INTEGER(), false, constantInteger); + + auto constantRow = vm.rowVector({ + BaseVector::createConstant(INTEGER(), 11, 1, pool), + BaseVector::createConstant(DOUBLE(), 1.23, 1, pool), + }); + createSet(ROW({INTEGER(), DOUBLE()}), true, constantRow); + createSet(ROW({INTEGER(), DOUBLE()}), false, constantRow); + + auto constantArray = vm.arrayVector({{1, 2, 3, 4, 5}}); + createSet(ARRAY(INTEGER()), true, constantArray); + createSet(ARRAY(INTEGER()), false, constantArray); + + auto constantMap = vm.mapVector({{{1, 1.23}, {2, 2.34}}}); + createSet(MAP(INTEGER(), REAL()), true, constantMap); + createSet(MAP(INTEGER(), REAL()), false, constantMap); + + benchmarkBuilder.registerBenchmarks(); + + folly::runBenchmarks(); + return 0; +}