From de35346d41b970da9e0c1b35a6b2f7a97a673009 Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Thu, 12 Oct 2023 15:30:56 -0700 Subject: [PATCH] Create a benchmark for map subscript. (#7026) Summary: ``` ============================================================================ [...]hmarks/ExpressionBenchmarkBuilder.cpp relative time/iter iters/s ============================================================================ map_subscript_MAP,INTEGER>##1 71.48ms 13.99 map_subscript_MAP,INTEGER>##2 76.58ms 13.06 map_subscript_MAP,INTEGER>##3 85.31ms 11.72 map_subscript_MAP,INTEGER>##4 121.56ms 8.23 map_subscript_MAP##1 27.19ms 36.78 map_subscript_MAP##2 33.10ms 30.21 map_subscript_MAP##3 33.47ms 29.88 map_subscript_MAP##4 31.70ms 31.55 map_subscript_MAP##1 26.92ms 37.14 map_subscript_MAP##2 36.62ms 27.31 map_subscript_MAP##3 34.19ms 29.24 map_subscript_MAP##4 33.76ms 29.62 ``` Differential Revision: D50237919 --- .../prestosql/benchmarks/CMakeLists.txt | 5 + .../benchmarks/MapSubscriptBenchmark.cpp | 105 ++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 velox/functions/prestosql/benchmarks/MapSubscriptBenchmark.cpp diff --git a/velox/functions/prestosql/benchmarks/CMakeLists.txt b/velox/functions/prestosql/benchmarks/CMakeLists.txt index a671b63f7831a..fb88180a53fb7 100644 --- a/velox/functions/prestosql/benchmarks/CMakeLists.txt +++ b/velox/functions/prestosql/benchmarks/CMakeLists.txt @@ -171,3 +171,8 @@ add_executable(velox_functions_benchmarks_simdjson_function_with_expr JsonExprBenchmark.cpp) target_link_libraries(velox_functions_benchmarks_simdjson_function_with_expr ${BENCHMARK_DEPENDENCIES}) + +add_executable(velox_functions_prestosql_benchmarks_map_subscript + MapSubscriptBenchmark.cpp) +target_link_libraries(velox_functions_prestosql_benchmarks_map_subscript + ${BENCHMARK_DEPENDENCIES}) diff --git a/velox/functions/prestosql/benchmarks/MapSubscriptBenchmark.cpp b/velox/functions/prestosql/benchmarks/MapSubscriptBenchmark.cpp new file mode 100644 index 0000000000000..b231334e4a70b --- /dev/null +++ b/velox/functions/prestosql/benchmarks/MapSubscriptBenchmark.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +#include "velox/benchmarks/ExpressionBenchmarkBuilder.h" +#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h" +#include "velox/functions/prestosql/registration/RegistrationFunctions.h" +#include "velox/vector/BaseVector.h" +#include "velox/vector/ComplexVector.h" +#include "velox/vector/DecodedVector.h" + +using namespace facebook::velox; +using namespace facebook::velox::exec; +using namespace facebook::velox::functions; + +int main(int argc, char** argv) { + folly::Init init(&argc, &argv); + + ExpressionBenchmarkBuilder benchmarkBuilder; + facebook::velox::functions::prestosql::registerAllScalarFunctions(); + + auto* pool = benchmarkBuilder.pool(); + auto& vm = benchmarkBuilder.vectorMaker(); + + auto createSet = [&](const TypePtr& mapType) { + VectorFuzzer::Options options; + options.vectorSize = 1'000; + options.containerLength = 20; + options.containerVariableLength = 20; + + VectorFuzzer fuzzer(options, pool); + std::vector columns; + + // Ratio = elements vector/ elements in base. + auto makeMapVector = [&](auto ratio) { + auto baseSize = options.vectorSize / ratio; + auto flatBase = fuzzer.fuzzFlat(mapType, baseSize); + auto dictionary = fuzzer.fuzzDictionary(flatBase, options.vectorSize); + return dictionary; + }; + + // Fuzz input vectors. + columns.push_back(makeMapVector(1)); + columns.push_back(makeMapVector(2)); + columns.push_back(makeMapVector(3)); + columns.push_back(makeMapVector(4)); + + // Fuzz valid keys for map at columns[index]. + auto makeKeys = [&](int index) { + DecodedVector decoded(*columns[index - 1]); + auto* map = decoded.base()->as(); + auto indices = allocateIndices(1000, pool); + auto* mutableIndices = indices->asMutable(); + for (int i = 0; i < 1000; i++) { + int keyIndex = folly::Random::rand32() % 20; + // We use the keyIndex as the key. + mutableIndices[i] = keyIndex; + } + return BaseVector::wrapInDictionary( + nullptr, indices, 1000, map->mapKeys()); + }; + + columns.push_back(makeKeys(1)); + columns.push_back(makeKeys(2)); + columns.push_back(makeKeys(3)); + columns.push_back(makeKeys(4)); + + auto indicesFlat = vm.flatVector( + options.vectorSize, + [&](auto row) { return row % options.containerLength; }); + columns.push_back(indicesFlat); + benchmarkBuilder + .addBenchmarkSet( + fmt::format("map_subscript_{}", mapType->toString()), + vm.rowVector(columns)) + .addExpression("1", "subscript(c0, c4)") + .addExpression("2", "subscript(c1, c5)") + .addExpression("3", "subscript(c2, c6)") + .addExpression("4", "subscript(c3, c7)"); + }; + + createSet(MAP(INTEGER(), INTEGER())); + createSet(MAP(VARCHAR(), INTEGER())); + createSet(MAP(ARRAY(VARCHAR()), INTEGER())); + + benchmarkBuilder.registerBenchmarks(); + + folly::runBenchmarks(); + return 0; +}