Skip to content

Commit

Permalink
Simple functions type analysis. (#1041)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #1041

Add an analysis to the simple function types to extract information that is needed
in the next diff to assign priorities to signatures.

The recursion that used to convert the types to their string representation is updated
to a general analysis that collects other information as well.

Unit tests added in the diff.

Reviewed By: kevinwilfong

Differential Revision: D34226886

fbshipit-source-id: c69d46415e9793e2c1c445e9496d5e1dd6d25af3
  • Loading branch information
laithsakka authored and facebook-github-bot committed Feb 16, 2022
1 parent c1f9c4b commit 685298d
Show file tree
Hide file tree
Showing 3 changed files with 269 additions and 40 deletions.
128 changes: 89 additions & 39 deletions velox/core/SimpleFunctionMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,76 +96,125 @@ struct ValidateVariadicArgs {
static constexpr bool value = isValidArg<0, TArgs...>();
};

// A set of structs used to convert Velox static types to their corresponding
// string representation in the function signature.
// Information collected during TypeAnalysis.
struct TypeAnalysisResults {
// Whether a variadic is encountered.
bool hasVariadic = false;

// Whether a generic is encountered.
bool hasGeneric = false;

// Whether a variadic of generic is encountered.
// E.g: Variadic<T> or Variadic<Array<T1>>.
bool hasVariadicOfGeneric = false;

// The number of types that are neither generic, nor variadic.
size_t concreteCount = 0;

// String representaion of the type in the FunctionSignatureBuilder.
std::ostringstream out;

// Set of generic variables used in the type.
std::set<std::string> variables;

std::string typeAsString() {
return out.str();
}

void resetTypeString() {
out.str(std::string());
}
};

// A set of structs used to perform analysis on a static type to
// collect information needed for signatrue construction.
template <typename T>
struct TypeStringBuilder {
void append(std::ostringstream& out, std::set<std::string>& /*unused*/) {
out << boost::algorithm::to_lower_copy(std::string(CppToType<T>::name));
struct TypeAnalysis {
void run(TypeAnalysisResults& results) {
results.concreteCount++;
results.out << boost::algorithm::to_lower_copy(
std::string(CppToType<T>::name));
}
};

template <typename T>
struct TypeStringBuilder<Generic<T>> {
void append(std::ostringstream& out, std::set<std::string>& variables) {
struct TypeAnalysis<Generic<T>> {
void run(TypeAnalysisResults& results) {
if constexpr (std::is_same<T, AnyType>::value) {
out << "any";
results.out << "any";
} else {
auto variableType = fmt::format("__user_T{}", T::getId());
out << variableType;
variables.insert(variableType);
results.out << variableType;
results.variables.insert(variableType);
}
results.hasGeneric = true;
}
};

template <typename K, typename V>
struct TypeStringBuilder<Map<K, V>> {
void append(std::ostringstream& out, std::set<std::string>& variables) {
out << "map(";
TypeStringBuilder<K>().append(out, variables);
out << ",";
TypeStringBuilder<V>().append(out, variables);
out << ")";
struct TypeAnalysis<Map<K, V>> {
void run(TypeAnalysisResults& results) {
results.concreteCount++;
results.out << "map(";
TypeAnalysis<K>().run(results);
results.out << ",";
TypeAnalysis<V>().run(results);
results.out << ")";
}
};

template <typename V>
struct TypeStringBuilder<Variadic<V>> {
void append(std::ostringstream& out, std::set<std::string>& variables) {
TypeStringBuilder<V>().append(out, variables);
struct TypeAnalysis<Variadic<V>> {
void run(TypeAnalysisResults& results) {
// We need to split, pass a clean results then merge results to correctly
// compute `hasVariadicOfGeneric`.
TypeAnalysisResults tmp;
TypeAnalysis<V>().run(tmp);

// Combine the child results.
results.hasVariadic = true;
results.hasGeneric = results.hasGeneric || tmp.hasGeneric;
results.hasVariadicOfGeneric =
tmp.hasGeneric || results.hasVariadicOfGeneric;

results.concreteCount += tmp.concreteCount;
results.variables.insert(tmp.variables.begin(), tmp.variables.end());
results.out << tmp.typeAsString();
}
};

template <typename V>
struct TypeStringBuilder<Array<V>> {
void append(std::ostringstream& out, std::set<std::string>& variables) {
out << "array(";
TypeStringBuilder<V>().append(out, variables);
out << ")";
struct TypeAnalysis<Array<V>> {
void run(TypeAnalysisResults& results) {
results.concreteCount++;
results.out << "array(";
TypeAnalysis<V>().run(results);
results.out << ")";
}
};

template <typename... T>
struct TypeStringBuilder<Row<T...>> {
struct TypeAnalysis<Row<T...>> {
using child_types = std::tuple<T...>;

template <size_t N>
using child_type_at = typename std::tuple_element<N, child_types>::type;

void append(std::ostringstream& out, std::set<std::string>& variables) {
out << "row(";
void run(TypeAnalysisResults& results) {
results.concreteCount++;
results.out << "row(";
// This expression applies the lambda for each row child type.
bool first = true;
(
[&]() {
if (!first) {
out << ", ";
results.out << ", ";
}
first = false;
TypeStringBuilder<T>().append(out, variables);
TypeAnalysis<T>().run(results);
}(),
...);
out << ")";
results.out << ")";
}
};

Expand Down Expand Up @@ -278,23 +327,24 @@ class SimpleFunctionMetadata : public ISimpleFunctionMetadata {
~SimpleFunctionMetadata() override = default;

std::shared_ptr<exec::FunctionSignature> signature() const final {
std::set<std::string> variables;
auto builder = exec::FunctionSignatureBuilder();

std::ostringstream out;
TypeStringBuilder<return_type>().append(out, variables);
builder.returnType(out.str());
TypeAnalysisResults results;
TypeAnalysis<return_type>().run(results);
builder.returnType(results.typeAsString());

// This expression applies the lambda for each input arg type.
(
[&]() {
std::ostringstream outLocal;
TypeStringBuilder<Args>().append(outLocal, variables);
builder.argumentType(outLocal.str());
// Clear string representation but keep other collected information to
// accumulate.
results.resetTypeString();
TypeAnalysis<Args>().run(results);
builder.argumentType(results.typeAsString());
}(),
...);

for (const auto& variable : variables) {
for (const auto& variable : results.variables) {
builder.typeVariable(variable);
}

Expand Down
3 changes: 2 additions & 1 deletion velox/core/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

add_executable(velox_core_test TestMap.cpp TestMetafunctions.cpp TestString.cpp)
add_executable(velox_core_test TestMap.cpp TestMetafunctions.cpp TestString.cpp
TestTypeAnalysis.cpp)

add_test(velox_core_test velox_core_test)

Expand Down
178 changes: 178 additions & 0 deletions velox/core/tests/TestTypeAnalysis.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>

#include "velox/core/SimpleFunctionMetadata.h"

// Test for simple function type analysis.
namespace facebook::velox::core {
namespace {
class TypeAnalysisTest : public testing::Test {
protected:
template <typename... Args>
void testHasGeneric(bool expecetd) {
TypeAnalysisResults results;
(TypeAnalysis<Args>().run(results), ...);
ASSERT_EQ(expecetd, results.hasGeneric);
}

template <typename... Args>
void testHasVariadic(bool expecetd) {
TypeAnalysisResults results;
(TypeAnalysis<Args>().run(results), ...);
ASSERT_EQ(expecetd, results.hasVariadic);
}

template <typename... Args>
void testHasVariadicOfGeneric(bool expecetd) {
TypeAnalysisResults results;
(TypeAnalysis<Args>().run(results), ...);
ASSERT_EQ(expecetd, results.hasVariadicOfGeneric);
}

template <typename... Args>
void testCountConcrete(size_t expecetd) {
TypeAnalysisResults results;
(TypeAnalysis<Args>().run(results), ...);
ASSERT_EQ(expecetd, results.concreteCount);
}

template <typename... Args>
void testStringType(const std::vector<std::string>& expected) {
TypeAnalysisResults results;
std::vector<std::string> types;

(
[&]() {
// Clear string representation but keep other collected information to
// accumulate.
results.resetTypeString();
TypeAnalysis<Args>().run(results);
types.push_back(results.typeAsString());
}(),
...);
ASSERT_EQ(expected, types);
}

template <typename... Args>
void testVariables(const std::set<std::string>& expected) {
TypeAnalysisResults results;
(TypeAnalysis<Args>().run(results), ...);
ASSERT_EQ(expected, results.variables);
}
};

TEST_F(TypeAnalysisTest, hasGeneric) {
testHasGeneric<int32_t>(false);
testHasGeneric<int32_t, int32_t>(false);
testHasGeneric<Variadic<int32_t>>(false);
testHasGeneric<Map<Array<int32_t>, Array<int32_t>>>(false);

testHasGeneric<Map<Array<Generic<>>, Array<int32_t>>>(true);
testHasGeneric<Map<Array<Generic<T1>>, Array<int32_t>>>(true);
testHasGeneric<Map<Array<int32_t>, Generic<>>>(true);
testHasGeneric<Variadic<Generic<>>>(true);
testHasGeneric<Generic<>>(true);
testHasGeneric<int32_t, Generic<>>(true);
testHasGeneric<Generic<>, int32_t>(true);
}

TEST_F(TypeAnalysisTest, hasVariadic) {
testHasVariadic<int32_t>(false);
testHasVariadic<Map<Array<int32_t>, Array<int32_t>>>(false);
testHasVariadic<Map<Array<int32_t>, Generic<>>>(false);
testHasVariadic<int32_t, Array<int32_t>>(false);

testHasVariadic<Variadic<int32_t>>(true);
testHasVariadic<Variadic<Generic<>>>(true);
testHasVariadic<Variadic<int64_t>, Array<int32_t>>(true);
testHasVariadic<int32_t, Variadic<Array<int32_t>>>(true);
}

TEST_F(TypeAnalysisTest, hasVariadicOfGeneric) {
testHasVariadicOfGeneric<int32_t>(false);
testHasVariadicOfGeneric<Map<Array<int32_t>, Array<int32_t>>>(false);
testHasVariadicOfGeneric<Map<Array<int32_t>, Generic<>>>(false);
testHasVariadicOfGeneric<int32_t, Array<int32_t>>(false);
testHasVariadicOfGeneric<Variadic<int32_t>>(false);
testHasVariadicOfGeneric<Variadic<int64_t>, Array<int32_t>>(false);
testHasVariadicOfGeneric<int32_t, Variadic<Array<int32_t>>>(false);
testHasVariadicOfGeneric<Variadic<int32_t>, Generic<>>(false);
testHasVariadicOfGeneric<Generic<>, Variadic<int32_t>>(false);

testHasVariadicOfGeneric<Variadic<Generic<>>>(true);
testHasVariadicOfGeneric<Variadic<Generic<>>, int32_t>(true);
testHasVariadicOfGeneric<int32_t, Variadic<Array<Generic<>>>>(true);
testHasVariadicOfGeneric<int32_t, Variadic<Map<int64_t, Array<Generic<T1>>>>>(
true);
}

TEST_F(TypeAnalysisTest, countConcrete) {
testCountConcrete<>(0);
testCountConcrete<int32_t>(1);
testCountConcrete<int32_t, int32_t>(2);
testCountConcrete<int32_t, int32_t, double>(3);
testCountConcrete<Generic<>>(0);
testCountConcrete<Generic<T1>>(0);
testCountConcrete<Variadic<Generic<>>>(0);
testCountConcrete<Variadic<int32_t>>(1);
testCountConcrete<Variadic<Array<Generic<>>>>(1);

testCountConcrete<Map<Array<int32_t>, Array<int32_t>>>(5);
testCountConcrete<Map<Array<int32_t>, Generic<>>>(3);
testCountConcrete<int32_t, Array<int32_t>>(3);
testCountConcrete<Variadic<int64_t>, Array<int32_t>>(3);
testCountConcrete<int32_t, Variadic<Array<int32_t>>>(3);
testCountConcrete<Variadic<int32_t>, Generic<>>(1);
testCountConcrete<Generic<>, Variadic<int32_t>>(1);

testCountConcrete<Variadic<Generic<>>>(0);
testCountConcrete<Variadic<Generic<>>, int32_t>(1);
testCountConcrete<int32_t, Variadic<Array<Generic<>>>>(2);
}

TEST_F(TypeAnalysisTest, testStringType) {
testStringType<int32_t>({"integer"});
testStringType<int64_t>({"bigint"});
testStringType<double>({"double"});
testStringType<float>({"real"});
testStringType<Array<int32_t>>({"array(integer)"});
testStringType<Generic<>>({"any"});
testStringType<Generic<T1>>({"__user_T1"});
testStringType<Map<Generic<>, int32_t>>({"map(any,integer)"});
testStringType<Variadic<int32_t>>({"integer"});

testStringType<int32_t, int64_t, Map<Array<int32_t>, Generic<T2>>>({
"integer",
"bigint",
"map(array(integer),__user_T2)",
});
}

TEST_F(TypeAnalysisTest, testVariables) {
testVariables<int32_t>({});
testVariables<Array<int32_t>>({});
testVariables<Generic<>>({});
testVariables<Generic<T1>>({"__user_T1"});
testVariables<Map<Generic<>, int32_t>>({});
testVariables<Variadic<int32_t>>({});
testVariables<int32_t, Generic<T5>, Map<Array<int32_t>, Generic<T2>>>(
{"__user_T2", "__user_T5"});
}

} // namespace
} // namespace facebook::velox::core

0 comments on commit 685298d

Please sign in to comment.