-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Create a benchmark for map subscript. #7026
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#include <folly/Benchmark.h> | ||
#include <folly/init/Init.h> | ||
#include <cstdint> | ||
|
||
#include "velox/benchmarks/ExpressionBenchmarkBuilder.h" | ||
#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h" | ||
#include "velox/functions/prestosql/registration/RegistrationFunctions.h" | ||
#include "velox/vector/BaseVector.h" | ||
#include "velox/vector/ComplexVector.h" | ||
#include "velox/vector/DecodedVector.h" | ||
|
||
using namespace facebook::velox; | ||
using namespace facebook::velox::exec; | ||
using namespace facebook::velox::functions; | ||
|
||
int main(int argc, char** argv) { | ||
folly::Init init(&argc, &argv); | ||
|
||
ExpressionBenchmarkBuilder benchmarkBuilder; | ||
facebook::velox::functions::prestosql::registerAllScalarFunctions(); | ||
|
||
auto* pool = benchmarkBuilder.pool(); | ||
auto& vm = benchmarkBuilder.vectorMaker(); | ||
|
||
auto createSet = [&](const TypePtr& mapType) { | ||
VectorFuzzer::Options options; | ||
options.vectorSize = 1'000; | ||
options.containerLength = 20; | ||
options.containerVariableLength = 20; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't containerVariableLength a bool? this is probably a typo |
||
|
||
VectorFuzzer fuzzer(options, pool); | ||
std::vector<VectorPtr> columns; | ||
|
||
// Ratio = elements vector/ elements in base. | ||
auto makeMapVector = [&](auto ratio) { | ||
auto baseSize = options.vectorSize / ratio; | ||
auto flatBase = fuzzer.fuzzFlat(mapType, baseSize); | ||
auto dictionary = fuzzer.fuzzDictionary(flatBase, options.vectorSize); | ||
return dictionary; | ||
}; | ||
|
||
// Fuzz input vectors. | ||
columns.push_back(makeMapVector(1)); | ||
columns.push_back(makeMapVector(2)); | ||
columns.push_back(makeMapVector(3)); | ||
columns.push_back(makeMapVector(4)); | ||
|
||
// Fuzz valid keys for map at columns[index]. | ||
auto makeKeys = [&](int index) { | ||
DecodedVector decoded(*columns[index - 1]); | ||
auto* map = decoded.base()->as<MapVector>(); | ||
auto indices = allocateIndices(1000, pool); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. everywhere you use 1000 in this lambda, it's because that's the value of options.vectorSize right? could we just use that variable here instead? |
||
auto* mutableIndices = indices->asMutable<vector_size_t>(); | ||
for (int i = 0; i < 1000; i++) { | ||
int keyIndex = folly::Random::rand32() % 20; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will mostly produce misses right? Is that intended? The first 20 elements in mapKeys are likely to be in the first few maps (maybe just the first one). And the keys are not very likely to occur in later maps because of the range the random map keys are pulled from. I may be misreading/misunderstanding the code. |
||
// We use the keyIndex as the key. | ||
mutableIndices[i] = keyIndex; | ||
} | ||
return BaseVector::wrapInDictionary( | ||
nullptr, indices, 1000, map->mapKeys()); | ||
}; | ||
|
||
columns.push_back(makeKeys(1)); | ||
columns.push_back(makeKeys(2)); | ||
columns.push_back(makeKeys(3)); | ||
columns.push_back(makeKeys(4)); | ||
|
||
auto indicesFlat = vm.flatVector<int64_t>( | ||
options.vectorSize, | ||
[&](auto row) { return row % options.containerLength; }); | ||
columns.push_back(indicesFlat); | ||
benchmarkBuilder | ||
.addBenchmarkSet( | ||
fmt::format("map_subscript_{}", mapType->toString()), | ||
vm.rowVector(columns)) | ||
.addExpression("1", "subscript(c0, c4)") | ||
.addExpression("2", "subscript(c1, c5)") | ||
.addExpression("3", "subscript(c2, c6)") | ||
.addExpression("4", "subscript(c3, c7)"); | ||
}; | ||
|
||
createSet(MAP(INTEGER(), INTEGER())); | ||
createSet(MAP(VARCHAR(), INTEGER())); | ||
createSet(MAP(ARRAY(VARCHAR()), INTEGER())); | ||
|
||
benchmarkBuilder.registerBenchmarks(); | ||
|
||
folly::runBenchmarks(); | ||
return 0; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we make some variability on this? This is the most important parameter in this case IMO