Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simple functions type analysis. #1041

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 89 additions & 39 deletions velox/core/SimpleFunctionMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,76 +96,125 @@ struct ValidateVariadicArgs {
static constexpr bool value = isValidArg<0, TArgs...>();
};

// A set of structs used to convert Velox static types to their corresponding
// string representation in the function signature.
// Information collected during TypeAnalysis.
struct TypeAnalysisResults {
// Whether a variadic is encountered.
bool hasVariadic = false;

// Whether a generic is encountered.
bool hasGeneric = false;

// Whether a variadic of generic is encountered.
// E.g: Variadic<T> or Variadic<Array<T1>>.
bool hasVariadicOfGeneric = false;

// The number of types that are neither generic, nor variadic.
size_t concreteCount = 0;

// String representaion of the type in the FunctionSignatureBuilder.
std::ostringstream out;

// Set of generic variables used in the type.
std::set<std::string> variables;

std::string typeAsString() {
return out.str();
}

void resetTypeString() {
out.str(std::string());
}
};

// A set of structs used to perform analysis on a static type to
// collect information needed for signatrue construction.
template <typename T>
struct TypeStringBuilder {
void append(std::ostringstream& out, std::set<std::string>& /*unused*/) {
out << boost::algorithm::to_lower_copy(std::string(CppToType<T>::name));
struct TypeAnalysis {
void run(TypeAnalysisResults& results) {
results.concreteCount++;
results.out << boost::algorithm::to_lower_copy(
std::string(CppToType<T>::name));
}
};

template <typename T>
struct TypeStringBuilder<Generic<T>> {
void append(std::ostringstream& out, std::set<std::string>& variables) {
struct TypeAnalysis<Generic<T>> {
void run(TypeAnalysisResults& results) {
if constexpr (std::is_same<T, AnyType>::value) {
out << "any";
results.out << "any";
} else {
auto variableType = fmt::format("__user_T{}", T::getId());
out << variableType;
variables.insert(variableType);
results.out << variableType;
results.variables.insert(variableType);
}
results.hasGeneric = true;
}
};

template <typename K, typename V>
struct TypeStringBuilder<Map<K, V>> {
void append(std::ostringstream& out, std::set<std::string>& variables) {
out << "map(";
TypeStringBuilder<K>().append(out, variables);
out << ",";
TypeStringBuilder<V>().append(out, variables);
out << ")";
struct TypeAnalysis<Map<K, V>> {
void run(TypeAnalysisResults& results) {
results.concreteCount++;
results.out << "map(";
TypeAnalysis<K>().run(results);
results.out << ",";
TypeAnalysis<V>().run(results);
results.out << ")";
}
};

template <typename V>
struct TypeStringBuilder<Variadic<V>> {
void append(std::ostringstream& out, std::set<std::string>& variables) {
TypeStringBuilder<V>().append(out, variables);
struct TypeAnalysis<Variadic<V>> {
void run(TypeAnalysisResults& results) {
// We need to split, pass a clean results then merge results to correctly
// compute `hasVariadicOfGeneric`.
TypeAnalysisResults tmp;
TypeAnalysis<V>().run(tmp);

// Combine the child results.
results.hasVariadic = true;
results.hasGeneric = results.hasGeneric || tmp.hasGeneric;
results.hasVariadicOfGeneric =
tmp.hasGeneric || results.hasVariadicOfGeneric;

results.concreteCount += tmp.concreteCount;
results.variables.insert(tmp.variables.begin(), tmp.variables.end());
results.out << tmp.typeAsString();
}
};

template <typename V>
struct TypeStringBuilder<Array<V>> {
void append(std::ostringstream& out, std::set<std::string>& variables) {
out << "array(";
TypeStringBuilder<V>().append(out, variables);
out << ")";
struct TypeAnalysis<Array<V>> {
void run(TypeAnalysisResults& results) {
results.concreteCount++;
results.out << "array(";
TypeAnalysis<V>().run(results);
results.out << ")";
}
};

template <typename... T>
struct TypeStringBuilder<Row<T...>> {
struct TypeAnalysis<Row<T...>> {
using child_types = std::tuple<T...>;

template <size_t N>
using child_type_at = typename std::tuple_element<N, child_types>::type;

void append(std::ostringstream& out, std::set<std::string>& variables) {
out << "row(";
void run(TypeAnalysisResults& results) {
results.concreteCount++;
results.out << "row(";
// This expression applies the lambda for each row child type.
bool first = true;
(
[&]() {
if (!first) {
out << ", ";
results.out << ", ";
}
first = false;
TypeStringBuilder<T>().append(out, variables);
TypeAnalysis<T>().run(results);
}(),
...);
out << ")";
results.out << ")";
}
};

Expand Down Expand Up @@ -278,23 +327,24 @@ class SimpleFunctionMetadata : public ISimpleFunctionMetadata {
~SimpleFunctionMetadata() override = default;

std::shared_ptr<exec::FunctionSignature> signature() const final {
std::set<std::string> variables;
auto builder = exec::FunctionSignatureBuilder();

std::ostringstream out;
TypeStringBuilder<return_type>().append(out, variables);
builder.returnType(out.str());
TypeAnalysisResults results;
TypeAnalysis<return_type>().run(results);
builder.returnType(results.typeAsString());

// This expression applies the lambda for each input arg type.
(
[&]() {
std::ostringstream outLocal;
TypeStringBuilder<Args>().append(outLocal, variables);
builder.argumentType(outLocal.str());
// Clear string representation but keep other collected information to
// accumulate.
results.resetTypeString();
TypeAnalysis<Args>().run(results);
builder.argumentType(results.typeAsString());
}(),
...);

for (const auto& variable : variables) {
for (const auto& variable : results.variables) {
builder.typeVariable(variable);
}

Expand Down
3 changes: 2 additions & 1 deletion velox/core/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

add_executable(velox_core_test TestMap.cpp TestMetafunctions.cpp TestString.cpp)
add_executable(velox_core_test TestMap.cpp TestMetafunctions.cpp TestString.cpp
TestTypeAnalysis.cpp)

add_test(velox_core_test velox_core_test)

Expand Down
178 changes: 178 additions & 0 deletions velox/core/tests/TestTypeAnalysis.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>

#include "velox/core/SimpleFunctionMetadata.h"

// Test for simple function type analysis.
namespace facebook::velox::core {
namespace {
class TypeAnalysisTest : public testing::Test {
protected:
template <typename... Args>
void testHasGeneric(bool expecetd) {
TypeAnalysisResults results;
(TypeAnalysis<Args>().run(results), ...);
ASSERT_EQ(expecetd, results.hasGeneric);
}

template <typename... Args>
void testHasVariadic(bool expecetd) {
TypeAnalysisResults results;
(TypeAnalysis<Args>().run(results), ...);
ASSERT_EQ(expecetd, results.hasVariadic);
}

template <typename... Args>
void testHasVariadicOfGeneric(bool expecetd) {
TypeAnalysisResults results;
(TypeAnalysis<Args>().run(results), ...);
ASSERT_EQ(expecetd, results.hasVariadicOfGeneric);
}

template <typename... Args>
void testCountConcrete(size_t expecetd) {
TypeAnalysisResults results;
(TypeAnalysis<Args>().run(results), ...);
ASSERT_EQ(expecetd, results.concreteCount);
}

template <typename... Args>
void testStringType(const std::vector<std::string>& expected) {
TypeAnalysisResults results;
std::vector<std::string> types;

(
[&]() {
// Clear string representation but keep other collected information to
// accumulate.
results.resetTypeString();
TypeAnalysis<Args>().run(results);
types.push_back(results.typeAsString());
}(),
...);
ASSERT_EQ(expected, types);
}

template <typename... Args>
void testVariables(const std::set<std::string>& expected) {
TypeAnalysisResults results;
(TypeAnalysis<Args>().run(results), ...);
ASSERT_EQ(expected, results.variables);
}
};

TEST_F(TypeAnalysisTest, hasGeneric) {
testHasGeneric<int32_t>(false);
testHasGeneric<int32_t, int32_t>(false);
testHasGeneric<Variadic<int32_t>>(false);
testHasGeneric<Map<Array<int32_t>, Array<int32_t>>>(false);

testHasGeneric<Map<Array<Generic<>>, Array<int32_t>>>(true);
testHasGeneric<Map<Array<Generic<T1>>, Array<int32_t>>>(true);
testHasGeneric<Map<Array<int32_t>, Generic<>>>(true);
testHasGeneric<Variadic<Generic<>>>(true);
testHasGeneric<Generic<>>(true);
testHasGeneric<int32_t, Generic<>>(true);
testHasGeneric<Generic<>, int32_t>(true);
}

TEST_F(TypeAnalysisTest, hasVariadic) {
testHasVariadic<int32_t>(false);
testHasVariadic<Map<Array<int32_t>, Array<int32_t>>>(false);
testHasVariadic<Map<Array<int32_t>, Generic<>>>(false);
testHasVariadic<int32_t, Array<int32_t>>(false);

testHasVariadic<Variadic<int32_t>>(true);
testHasVariadic<Variadic<Generic<>>>(true);
testHasVariadic<Variadic<int64_t>, Array<int32_t>>(true);
testHasVariadic<int32_t, Variadic<Array<int32_t>>>(true);
}

TEST_F(TypeAnalysisTest, hasVariadicOfGeneric) {
testHasVariadicOfGeneric<int32_t>(false);
testHasVariadicOfGeneric<Map<Array<int32_t>, Array<int32_t>>>(false);
testHasVariadicOfGeneric<Map<Array<int32_t>, Generic<>>>(false);
testHasVariadicOfGeneric<int32_t, Array<int32_t>>(false);
testHasVariadicOfGeneric<Variadic<int32_t>>(false);
testHasVariadicOfGeneric<Variadic<int64_t>, Array<int32_t>>(false);
testHasVariadicOfGeneric<int32_t, Variadic<Array<int32_t>>>(false);
testHasVariadicOfGeneric<Variadic<int32_t>, Generic<>>(false);
testHasVariadicOfGeneric<Generic<>, Variadic<int32_t>>(false);

testHasVariadicOfGeneric<Variadic<Generic<>>>(true);
testHasVariadicOfGeneric<Variadic<Generic<>>, int32_t>(true);
testHasVariadicOfGeneric<int32_t, Variadic<Array<Generic<>>>>(true);
testHasVariadicOfGeneric<int32_t, Variadic<Map<int64_t, Array<Generic<T1>>>>>(
true);
}

TEST_F(TypeAnalysisTest, countConcrete) {
testCountConcrete<>(0);
testCountConcrete<int32_t>(1);
testCountConcrete<int32_t, int32_t>(2);
testCountConcrete<int32_t, int32_t, double>(3);
testCountConcrete<Generic<>>(0);
testCountConcrete<Generic<T1>>(0);
testCountConcrete<Variadic<Generic<>>>(0);
testCountConcrete<Variadic<int32_t>>(1);
testCountConcrete<Variadic<Array<Generic<>>>>(1);

testCountConcrete<Map<Array<int32_t>, Array<int32_t>>>(5);
testCountConcrete<Map<Array<int32_t>, Generic<>>>(3);
testCountConcrete<int32_t, Array<int32_t>>(3);
testCountConcrete<Variadic<int64_t>, Array<int32_t>>(3);
testCountConcrete<int32_t, Variadic<Array<int32_t>>>(3);
testCountConcrete<Variadic<int32_t>, Generic<>>(1);
testCountConcrete<Generic<>, Variadic<int32_t>>(1);

testCountConcrete<Variadic<Generic<>>>(0);
testCountConcrete<Variadic<Generic<>>, int32_t>(1);
testCountConcrete<int32_t, Variadic<Array<Generic<>>>>(2);
}

TEST_F(TypeAnalysisTest, testStringType) {
testStringType<int32_t>({"integer"});
testStringType<int64_t>({"bigint"});
testStringType<double>({"double"});
testStringType<float>({"real"});
testStringType<Array<int32_t>>({"array(integer)"});
testStringType<Generic<>>({"any"});
testStringType<Generic<T1>>({"__user_T1"});
testStringType<Map<Generic<>, int32_t>>({"map(any,integer)"});
testStringType<Variadic<int32_t>>({"integer"});

testStringType<int32_t, int64_t, Map<Array<int32_t>, Generic<T2>>>({
"integer",
"bigint",
"map(array(integer),__user_T2)",
});
}

TEST_F(TypeAnalysisTest, testVariables) {
testVariables<int32_t>({});
testVariables<Array<int32_t>>({});
testVariables<Generic<>>({});
testVariables<Generic<T1>>({"__user_T1"});
testVariables<Map<Generic<>, int32_t>>({});
testVariables<Variadic<int32_t>>({});
testVariables<int32_t, Generic<T5>, Map<Array<int32_t>, Generic<T2>>>(
{"__user_T2", "__user_T5"});
}

} // namespace
} // namespace facebook::velox::core