Skip to content

Commit

Permalink
Optimize array_constructor (facebookincubator#6568)
Browse files Browse the repository at this point in the history
Summary:

array_constructor is very slow: facebookincubator#5958 (comment)

array_constructor uses BaseVector::copyRanges, which is somewhat fast for arrays and maps, but very slow for primitive types:

```
FlatVector.h

  void copyRanges(
      const BaseVector* source,
      const folly::Range<const BaseVector::CopyRange*>& ranges) override {
    for (auto& range : ranges) {
      copy(source, range.targetIndex, range.sourceIndex, range.count);
    }
  }
```

FlatVector<T>::copy(source, rows, toSourceRow) is faster.

Switching from copyRanges to copy speeds up array_constructor for primitive types and structs significantly. Yet, this change makes arrays and maps slower.

The slowness is due to ArrayVector and MapVector not having implementation for copy(source, rows, toSourceRow). They rely on BaseVector::copy to translate rows + toSourceRow to ranges. This extra processing causes perf regression.

Hence, we use copy for primitive types and structs of these and copyRanges for everything else.

```
Before:

array_constructor_ARRAY_nullfree#facebookincubator#1                        16.80ms     59.53
array_constructor_ARRAY_nullfree#facebookincubator#2                        27.02ms     37.01
array_constructor_ARRAY_nullfree#facebookincubator#3                        38.03ms     26.30
array_constructor_ARRAY_nullfree##2_null                   52.86ms     18.92
array_constructor_ARRAY_nullfree##2_const                  54.97ms     18.19
array_constructor_ARRAY_nulls#facebookincubator#1                           30.61ms     32.66
array_constructor_ARRAY_nulls#facebookincubator#2                           55.01ms     18.18
array_constructor_ARRAY_nulls#facebookincubator#3                           80.69ms     12.39
array_constructor_ARRAY_nulls##2_null                      69.10ms     14.47
array_constructor_ARRAY_nulls##2_const                    103.85ms      9.63


After:

array_constructor_ARRAY_nullfree#facebookincubator#1                        15.25ms     65.58
array_constructor_ARRAY_nullfree#facebookincubator#2                        25.11ms     39.82
array_constructor_ARRAY_nullfree#facebookincubator#3                        34.59ms     28.91
array_constructor_ARRAY_nullfree##2_null                   53.61ms     18.65
array_constructor_ARRAY_nullfree##2_const                  51.48ms     19.42
array_constructor_ARRAY_nulls#facebookincubator#1                           29.99ms     33.34
array_constructor_ARRAY_nulls#facebookincubator#2                           55.91ms     17.89
array_constructor_ARRAY_nulls#facebookincubator#3                           81.73ms     12.24
array_constructor_ARRAY_nulls##2_null                      66.97ms     14.93
array_constructor_ARRAY_nulls##2_const                     92.96ms     10.76


Before:

array_constructor_INTEGER_nullfree#facebookincubator#1                      19.72ms     50.71
array_constructor_INTEGER_nullfree#facebookincubator#2                      34.51ms     28.97
array_constructor_INTEGER_nullfree#facebookincubator#3                      47.95ms     20.86
array_constructor_INTEGER_nullfree##2_null                 58.68ms     17.04
array_constructor_INTEGER_nullfree##2_const                45.15ms     22.15
array_constructor_INTEGER_nulls#facebookincubator#1                         29.99ms     33.34
array_constructor_INTEGER_nulls#facebookincubator#2                         55.32ms     18.08
array_constructor_INTEGER_nulls#facebookincubator#3                         78.53ms     12.73
array_constructor_INTEGER_nulls##2_null                    72.24ms     13.84
array_constructor_INTEGER_nulls##2_const                   71.13ms     14.06


After:

array_constructor_INTEGER_nullfree#facebookincubator#1                       3.39ms    294.89
array_constructor_INTEGER_nullfree#facebookincubator#2                       7.35ms    136.10
array_constructor_INTEGER_nullfree#facebookincubator#3                      10.78ms     92.74
array_constructor_INTEGER_nullfree##2_null                 11.29ms     88.57
array_constructor_INTEGER_nullfree##2_const                10.14ms     98.65
array_constructor_INTEGER_nulls#facebookincubator#1                          4.49ms    222.53
array_constructor_INTEGER_nulls#facebookincubator#2                          9.78ms    102.29
array_constructor_INTEGER_nulls#facebookincubator#3                         14.69ms     68.08
array_constructor_INTEGER_nulls##2_null                    12.14ms     82.36
array_constructor_INTEGER_nulls##2_const                   12.27ms     81.53

Before:

array_constructor_MAP_nullfree#facebookincubator#1                          17.34ms     57.65
array_constructor_MAP_nullfree#facebookincubator#2                          29.84ms     33.51
array_constructor_MAP_nullfree#facebookincubator#3                          41.51ms     24.09
array_constructor_MAP_nullfree##2_null                     56.57ms     17.68
array_constructor_MAP_nullfree##2_const                    71.68ms     13.95
array_constructor_MAP_nulls#facebookincubator#1                             36.22ms     27.61
array_constructor_MAP_nulls#facebookincubator#2                             68.18ms     14.67
array_constructor_MAP_nulls#facebookincubator#3                             95.12ms     10.51
array_constructor_MAP_nulls##2_null                        86.42ms     11.57
array_constructor_MAP_nulls##2_const                      120.10ms      8.33


After:

array_constructor_MAP_nullfree#facebookincubator#1                          17.05ms     58.66
array_constructor_MAP_nullfree#facebookincubator#2                          28.42ms     35.18
array_constructor_MAP_nullfree#facebookincubator#3                          36.96ms     27.06
array_constructor_MAP_nullfree##2_null                     55.64ms     17.97
array_constructor_MAP_nullfree##2_const                    67.53ms     14.81
array_constructor_MAP_nulls#facebookincubator#1                             32.91ms     30.39
array_constructor_MAP_nulls#facebookincubator#2                             64.50ms     15.50
array_constructor_MAP_nulls#facebookincubator#3                             95.71ms     10.45
array_constructor_MAP_nulls##2_null                        77.22ms     12.95
array_constructor_MAP_nulls##2_const                      114.91ms      8.70

Before:

array_constructor_ROW_nullfree#facebookincubator#1                          33.88ms     29.52
array_constructor_ROW_nullfree#facebookincubator#2                          62.00ms     16.13
array_constructor_ROW_nullfree#facebookincubator#3                          89.54ms     11.17
array_constructor_ROW_nullfree##2_null                     78.46ms     12.75
array_constructor_ROW_nullfree##2_const                    95.53ms     10.47
array_constructor_ROW_nulls#facebookincubator#1                             44.11ms     22.67
array_constructor_ROW_nulls#facebookincubator#2                            115.43ms      8.66
array_constructor_ROW_nulls#facebookincubator#3                            173.61ms      5.76
array_constructor_ROW_nulls##2_null                       130.40ms      7.67
array_constructor_ROW_nulls##2_const                      169.97ms      5.88

After:

array_constructor_ROW_nullfree#facebookincubator#1                           5.55ms    180.15
array_constructor_ROW_nullfree#facebookincubator#2                          12.83ms     77.94
array_constructor_ROW_nullfree#facebookincubator#3                          18.89ms     52.95
array_constructor_ROW_nullfree##2_null                     18.74ms     53.36
array_constructor_ROW_nullfree##2_const                    18.16ms     55.07
array_constructor_ROW_nulls#facebookincubator#1                             11.29ms     88.61
array_constructor_ROW_nulls#facebookincubator#2                             18.57ms     53.86
array_constructor_ROW_nulls#facebookincubator#3                             34.20ms     29.24
array_constructor_ROW_nulls##2_null                        25.05ms     39.92
array_constructor_ROW_nulls##2_const                       25.15ms     39.77
```

Differential Revision: D49272797
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Sep 15, 2023
1 parent c513fd4 commit 334f3dc
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 14 deletions.
81 changes: 67 additions & 14 deletions velox/functions/prestosql/ArrayConstructor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,55 @@ class ArrayConstructor : public exec::VectorFunction {
} else {
elementsResult->resize(baseOffset + numArgs * rows.countSelected());

std::vector<BaseVector::CopyRange> ranges;
ranges.reserve(rows.end());
if (shouldCopyRanges(elementsResult->type())) {
std::vector<BaseVector::CopyRange> ranges;
ranges.reserve(rows.end());

vector_size_t offset = baseOffset;
rows.applyToSelected([&](vector_size_t row) {
rawSizes[row] = numArgs;
rawOffsets[row] = offset;
ranges.push_back({row, offset, 1});
offset += numArgs;
});
vector_size_t offset = baseOffset;
rows.applyToSelected([&](vector_size_t row) {
rawSizes[row] = numArgs;
rawOffsets[row] = offset;
ranges.push_back({row, offset, 1});
offset += numArgs;
});

elementsResult->copyRanges(args[0].get(), ranges);
elementsResult->copyRanges(args[0].get(), ranges);

for (int i = 1; i < numArgs; i++) {
for (auto& range : ranges) {
++range.targetIndex;
for (int i = 1; i < numArgs; i++) {
for (auto& range : ranges) {
++range.targetIndex;
}
elementsResult->copyRanges(args[i].get(), ranges);
}
} else {
SelectivityVector targetRows(elementsResult->size(), false);
std::vector<vector_size_t> toSourceRow(elementsResult->size());

vector_size_t offset = baseOffset;
rows.applyToSelected([&](vector_size_t row) {
rawSizes[row] = numArgs;
rawOffsets[row] = offset;

targetRows.setValid(offset, true);
toSourceRow[offset] = row;

offset += numArgs;
});
targetRows.updateBounds();
elementsResult->copy(args[0].get(), targetRows, toSourceRow.data());

for (int i = 1; i < numArgs; i++) {
targetRows.clearAll();
vector_size_t offset = baseOffset;
rows.applyToSelected([&](vector_size_t row) {
targetRows.setValid(offset + i, true);
toSourceRow[offset + i] = row;
offset += numArgs;
});

targetRows.updateBounds();
elementsResult->copy(args[i].get(), targetRows, toSourceRow.data());
}
elementsResult->copyRanges(args[i].get(), ranges);
}
}
}
Expand All @@ -90,6 +121,28 @@ class ArrayConstructor : public exec::VectorFunction {
.build(),
};
}

private:
// BaseVector::copyRange is faster for arrays and maps and slower for
// primitive types. Check if 'type' is an array or map or contains an array or
// map. If so, return true, otherwise, false.
static bool shouldCopyRanges(const TypePtr& type) {
if (type->isPrimitiveType()) {
return false;
}

if (!type->isRow()) {
return true;
}

const auto& rowType = type->asRow();
for (const auto& child : rowType.children()) {
if (shouldCopyRanges(child)) {
return true;
}
}
return false;
}
};
} // namespace

Expand Down
94 changes: 94 additions & 0 deletions velox/functions/prestosql/benchmarks/ArrayConstructorBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/Benchmark.h>
#include <folly/init/Init.h>

#include "velox/benchmarks/ExpressionBenchmarkBuilder.h"
#include "velox/functions/lib/LambdaFunctionUtil.h"
#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h"
#include "velox/functions/prestosql/ArrayFunctions.h"
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"

using namespace facebook::velox;
using namespace facebook::velox::exec;
using namespace facebook::velox::functions;

int main(int argc, char** argv) {
folly::init(&argc, &argv);

functions::prestosql::registerArrayFunctions();

ExpressionBenchmarkBuilder benchmarkBuilder;

auto* pool = benchmarkBuilder.pool();
auto& vm = benchmarkBuilder.vectorMaker();

auto createSet =
[&](const TypePtr& type, bool withNulls, const VectorPtr& constantInput) {
VectorFuzzer::Options options;
options.vectorSize = 1'000;
options.nullRatio = withNulls ? 0.2 : 0.0;

VectorFuzzer fuzzer(options, pool);
std::vector<VectorPtr> columns;
columns.push_back(fuzzer.fuzzFlat(type));
columns.push_back(fuzzer.fuzzFlat(type));
columns.push_back(fuzzer.fuzzFlat(type));
columns.push_back(
BaseVector::createNullConstant(type, options.vectorSize, pool));
columns.push_back(
BaseVector::wrapInConstant(options.vectorSize, 0, constantInput));

auto input = vm.rowVector({"c0", "c1", "c2", "n", "c"}, columns);

benchmarkBuilder
.addBenchmarkSet(
fmt::format(
"array_constructor_{}_{}",
mapTypeKindToName(type->kind()),
withNulls ? "nulls" : "nullfree"),
input)
.addExpression("1", "array_constructor(c0)")
.addExpression("2", "array_constructor(c0, c1)")
.addExpression("3", "array_constructor(c0, c1, c2)")
.addExpression("2_null", "array_constructor(c0, c1, n)")
.addExpression("2_const", "array_constructor(c0, c1, c)");
};

auto constantInteger = BaseVector::createConstant(INTEGER(), 11, 1, pool);
createSet(INTEGER(), true, constantInteger);
createSet(INTEGER(), false, constantInteger);

auto constantRow = vm.rowVector({
BaseVector::createConstant(INTEGER(), 11, 1, pool),
BaseVector::createConstant(DOUBLE(), 1.23, 1, pool),
});
createSet(ROW({INTEGER(), DOUBLE()}), true, constantRow);
createSet(ROW({INTEGER(), DOUBLE()}), false, constantRow);

auto constantArray = vm.arrayVector<int32_t>({{1, 2, 3, 4, 5}});
createSet(ARRAY(INTEGER()), true, constantArray);
createSet(ARRAY(INTEGER()), false, constantArray);

auto constantMap = vm.mapVector<int32_t, float>({{{1, 1.23}, {2, 2.34}}});
createSet(MAP(INTEGER(), REAL()), true, constantMap);
createSet(MAP(INTEGER(), REAL()), false, constantMap);

benchmarkBuilder.registerBenchmarks();

folly::runBenchmarks();
return 0;
}

0 comments on commit 334f3dc

Please sign in to comment.