Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a benchmark for map subscript. #7026

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions velox/functions/prestosql/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,8 @@ add_executable(velox_functions_benchmarks_simdjson_function_with_expr
JsonExprBenchmark.cpp)
target_link_libraries(velox_functions_benchmarks_simdjson_function_with_expr
${BENCHMARK_DEPENDENCIES})

add_executable(velox_functions_prestosql_benchmarks_map_subscript
MapSubscriptBenchmark.cpp)
target_link_libraries(velox_functions_prestosql_benchmarks_map_subscript
${BENCHMARK_DEPENDENCIES})
105 changes: 105 additions & 0 deletions velox/functions/prestosql/benchmarks/MapSubscriptBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/Benchmark.h>
#include <folly/init/Init.h>
#include <cstdint>

#include "velox/benchmarks/ExpressionBenchmarkBuilder.h"
#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h"
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
#include "velox/vector/BaseVector.h"
#include "velox/vector/ComplexVector.h"
#include "velox/vector/DecodedVector.h"

using namespace facebook::velox;
using namespace facebook::velox::exec;
using namespace facebook::velox::functions;

int main(int argc, char** argv) {
folly::Init init(&argc, &argv);

ExpressionBenchmarkBuilder benchmarkBuilder;
facebook::velox::functions::prestosql::registerAllScalarFunctions();

auto* pool = benchmarkBuilder.pool();
auto& vm = benchmarkBuilder.vectorMaker();

auto createSet = [&](const TypePtr& mapType) {
VectorFuzzer::Options options;
options.vectorSize = 1'000;
options.containerLength = 20;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make some variability on this? This is the most important parameter in this case IMO

options.containerVariableLength = 20;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't containerVariableLength a bool? this is probably a typo


VectorFuzzer fuzzer(options, pool);
std::vector<VectorPtr> columns;

// Ratio = elements vector/ elements in base.
auto makeMapVector = [&](auto ratio) {
auto baseSize = options.vectorSize / ratio;
auto flatBase = fuzzer.fuzzFlat(mapType, baseSize);
auto dictionary = fuzzer.fuzzDictionary(flatBase, options.vectorSize);
return dictionary;
};

// Fuzz input vectors.
columns.push_back(makeMapVector(1));
columns.push_back(makeMapVector(2));
columns.push_back(makeMapVector(3));
columns.push_back(makeMapVector(4));

// Fuzz valid keys for map at columns[index].
auto makeKeys = [&](int index) {
DecodedVector decoded(*columns[index - 1]);
auto* map = decoded.base()->as<MapVector>();
auto indices = allocateIndices(1000, pool);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

everywhere you use 1000 in this lambda, it's because that's the value of options.vectorSize right? could we just use that variable here instead?

auto* mutableIndices = indices->asMutable<vector_size_t>();
for (int i = 0; i < 1000; i++) {
int keyIndex = folly::Random::rand32() % 20;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will mostly produce misses right? Is that intended?

The first 20 elements in mapKeys are likely to be in the first few maps (maybe just the first one). And the keys are not very likely to occur in later maps because of the range the random map keys are pulled from.

I may be misreading/misunderstanding the code.

// We use the keyIndex as the key.
mutableIndices[i] = keyIndex;
}
return BaseVector::wrapInDictionary(
nullptr, indices, 1000, map->mapKeys());
};

columns.push_back(makeKeys(1));
columns.push_back(makeKeys(2));
columns.push_back(makeKeys(3));
columns.push_back(makeKeys(4));

auto indicesFlat = vm.flatVector<int64_t>(
options.vectorSize,
[&](auto row) { return row % options.containerLength; });
columns.push_back(indicesFlat);
benchmarkBuilder
.addBenchmarkSet(
fmt::format("map_subscript_{}", mapType->toString()),
vm.rowVector(columns))
.addExpression("1", "subscript(c0, c4)")
.addExpression("2", "subscript(c1, c5)")
.addExpression("3", "subscript(c2, c6)")
.addExpression("4", "subscript(c3, c7)");
};

createSet(MAP(INTEGER(), INTEGER()));
createSet(MAP(VARCHAR(), INTEGER()));
createSet(MAP(ARRAY(VARCHAR()), INTEGER()));

benchmarkBuilder.registerBenchmarks();

folly::runBenchmarks();
return 0;
}