From 5427c06c8c0683d84d5c5ea630659eaba2d9c473 Mon Sep 17 00:00:00 2001 From: zhejiangxiaomai Date: Tue, 26 Jul 2022 23:17:41 +0800 Subject: [PATCH] rebase facebook/velox 7.27 --- scripts/setup-helper-functions.sh | 2 +- third_party/CMakeLists.txt | 4 +++- velox/dwio/dwrf/common/Checksum.h | 2 +- velox/dwio/parquet/writer/Writer.cpp | 4 ++-- velox/external/duckdb/duckdb.hpp | 2 +- velox/substrait/CMakeLists.txt | 2 +- velox/substrait/SubstraitToVeloxPlan.cpp | 20 +++++++++++-------- velox/substrait/proto/substrait/algebra.proto | 4 ++-- .../substrait/proto/substrait/function.proto | 6 +++--- velox/substrait/tests/CMakeLists.txt | 1 + .../Substrait2VeloxPlanConversionTest.cpp | 18 ++++++++--------- velox/vector/arrow/c/Bridge.cpp | 2 +- 12 files changed, 37 insertions(+), 30 deletions(-) diff --git a/scripts/setup-helper-functions.sh b/scripts/setup-helper-functions.sh index 6a3ef974eae1..fe27d28afbdb 100644 --- a/scripts/setup-helper-functions.sh +++ b/scripts/setup-helper-functions.sh @@ -100,7 +100,7 @@ function get_cxx_flags { ;; "avx") - echo -n "-mavx2 -mfma -mavx -mf16c -mlzcnt -std=c++17 -mbmi2 -mbmi" + echo -n "-mavx2 -mfma -mavx -mf16c -mlzcnt -std=c++17" ;; "sse") diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index 2ae6948514df..ee9e4f8e27ce 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -39,7 +39,9 @@ if(VELOX_ENABLE_ARROW) -DARROW_WITH_UTF8PROC=OFF -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install -DARROW_BUILD_STATIC=ON - -DThrift_SOURCE=${THRIFT_SOURCE}) + -DThrift_SOURCE=BUNDLED + -DARROW_DEPENDENCY_SOURCE=BUNDLED + -Dre2_SOURCE=AUTO) set(ARROW_LIBDIR ${ARROW_PREFIX}/install/${CMAKE_INSTALL_LIBDIR}) add_library(thrift STATIC IMPORTED GLOBAL) diff --git a/velox/dwio/dwrf/common/Checksum.h b/velox/dwio/dwrf/common/Checksum.h index 6ca7d205d99a..611c671f921b 100644 --- a/velox/dwio/dwrf/common/Checksum.h +++ b/velox/dwio/dwrf/common/Checksum.h @@ -21,7 +21,7 @@ #include #define XXH_INLINE_ALL -#include +#include "velox/external/xxhash/xxhash.h" namespace facebook::velox::dwrf { diff --git a/velox/dwio/parquet/writer/Writer.cpp b/velox/dwio/parquet/writer/Writer.cpp index eb723ff0d7ff..b9bf70d32c40 100644 --- a/velox/dwio/parquet/writer/Writer.cpp +++ b/velox/dwio/parquet/writer/Writer.cpp @@ -15,9 +15,9 @@ */ #include "velox/dwio/parquet/writer/Writer.h" -#include // @manual +#include #include // @manual -#include "velox/vector/arrow/Bridge.h" +#include "velox/vector/arrow/c/Bridge.h" namespace facebook::velox::parquet { diff --git a/velox/external/duckdb/duckdb.hpp b/velox/external/duckdb/duckdb.hpp index 2664bdc54129..ffa3f2853cd8 100644 --- a/velox/external/duckdb/duckdb.hpp +++ b/velox/external/duckdb/duckdb.hpp @@ -397,7 +397,7 @@ namespace duckdb { DUCKDB_API void DuckDBAssertInternal(bool condition, const char *condition_name, const char *file, int linenr); } -#define D_ASSERT(condition) duckdb::DuckDBAssertInternal(bool(condition), #condition, __FILE__, __LINE__) +#define D_ASSERT(condition) ::duckdb::DuckDBAssertInternal(bool(condition), #condition, __FILE__, __LINE__) #endif diff --git a/velox/substrait/CMakeLists.txt b/velox/substrait/CMakeLists.txt index c9deb73af2f8..f15376b60ae6 100644 --- a/velox/substrait/CMakeLists.txt +++ b/velox/substrait/CMakeLists.txt @@ -33,7 +33,7 @@ get_filename_component(PROTO_DIR ${substrait_proto_directory}/, DIRECTORY) # Generate Substrait hearders add_custom_command( OUTPUT ${PROTO_OUTPUT_FILES} - COMMAND protoc --proto_path ${CMAKE_SOURCE_DIR}/ --cpp_out ${CMAKE_SOURCE_DIR} + COMMAND protoc --proto_path ${proto_directory}/ --cpp_out ${PROTO_OUTPUT_DIR} ${PROTO_FILES} DEPENDS ${PROTO_DIR} COMMENT "Running PROTO compiler" diff --git a/velox/substrait/SubstraitToVeloxPlan.cpp b/velox/substrait/SubstraitToVeloxPlan.cpp index 3ca64662ab03..abc8311fe49f 100644 --- a/velox/substrait/SubstraitToVeloxPlan.cpp +++ b/velox/substrait/SubstraitToVeloxPlan.cpp @@ -166,7 +166,7 @@ std::shared_ptr SubstraitVeloxPlanConverter::toVeloxPlan( joinType = core::JoinType::kRight; break; case ::substrait::JoinRel_JoinType::JoinRel_JoinType_JOIN_TYPE_SEMI: - joinType = core::JoinType::kSemi; + joinType = core::JoinType::kLeftSemi; break; case ::substrait::JoinRel_JoinType::JoinRel_JoinType_JOIN_TYPE_ANTI: joinType = core::JoinType::kAnti; @@ -400,6 +400,8 @@ std::shared_ptr SubstraitVeloxPlanConverter::toVeloxPlan( // Parse local files and construct split info. if (sRead.has_local_files()) { + using SubstraitFileFormatCase = + ::substrait::ReadRel_LocalFiles_FileOrFiles::FileFormatCase; const auto& fileList = sRead.local_files().items(); splitInfo->paths.reserve(fileList.size()); splitInfo->starts.reserve(fileList.size()); @@ -410,13 +412,15 @@ std::shared_ptr SubstraitVeloxPlanConverter::toVeloxPlan( splitInfo->paths.emplace_back(file.uri_file()); splitInfo->starts.emplace_back(file.start()); splitInfo->lengths.emplace_back(file.length()); - auto format = file.format(); - if (format == 2 || format == 3) { - splitInfo->format = dwio::common::FileFormat::DWRF; - } else if (format == 1) { - splitInfo->format = dwio::common::FileFormat::PARQUET; - } else { - splitInfo->format = dwio::common::FileFormat::UNKNOWN; + switch (file.file_format_case()) { + case SubstraitFileFormatCase::kOrc: + splitInfo->format = dwio::common::FileFormat::DWRF; + break; + case SubstraitFileFormatCase::kParquet: + splitInfo->format = dwio::common::FileFormat::PARQUET; + break; + default: + splitInfo->format = dwio::common::FileFormat::UNKNOWN; } } } diff --git a/velox/substrait/proto/substrait/algebra.proto b/velox/substrait/proto/substrait/algebra.proto index 8dcc9c8d2110..7da4bf0f8b72 100644 --- a/velox/substrait/proto/substrait/algebra.proto +++ b/velox/substrait/proto/substrait/algebra.proto @@ -5,8 +5,8 @@ package substrait; import "google/protobuf/any.proto"; import "google/protobuf/empty.proto"; -import "velox/substrait/proto/substrait/extensions/extensions.proto"; -import "velox/substrait/proto/substrait/type.proto"; +import "substrait/extensions/extensions.proto"; +import "substrait/type.proto"; option cc_enable_arenas = true; option csharp_namespace = "Substrait.Protobuf"; diff --git a/velox/substrait/proto/substrait/function.proto b/velox/substrait/proto/substrait/function.proto index 613368f78d5e..0d09bef0eb8d 100644 --- a/velox/substrait/proto/substrait/function.proto +++ b/velox/substrait/proto/substrait/function.proto @@ -3,9 +3,9 @@ syntax = "proto3"; package substrait; -import "velox/substrait/proto/substrait/parameterized_types.proto"; -import "velox/substrait/proto/substrait/type.proto"; -import "velox/substrait/proto/substrait/type_expressions.proto"; +import "substrait/parameterized_types.proto"; +import "substrait/type.proto"; +import "substrait/type_expressions.proto"; option cc_enable_arenas = true; option csharp_namespace = "Substrait.Protobuf"; diff --git a/velox/substrait/tests/CMakeLists.txt b/velox/substrait/tests/CMakeLists.txt index a16fa6f2a6b6..eb1eea008682 100644 --- a/velox/substrait/tests/CMakeLists.txt +++ b/velox/substrait/tests/CMakeLists.txt @@ -31,6 +31,7 @@ target_link_libraries( velox_functions_test_lib velox_exec velox_dwio_common + velox_dwio_test_utils velox_aggregates velox_aggregates_test_lib velox_functions_lib diff --git a/velox/substrait/tests/Substrait2VeloxPlanConversionTest.cpp b/velox/substrait/tests/Substrait2VeloxPlanConversionTest.cpp index 40baebada64b..1e592d3ff823 100644 --- a/velox/substrait/tests/Substrait2VeloxPlanConversionTest.cpp +++ b/velox/substrait/tests/Substrait2VeloxPlanConversionTest.cpp @@ -431,16 +431,16 @@ TEST_F(Substrait2VeloxPlanConversionTest, q6FirstStageTest) { auto veloxConverter = std::make_shared(); genLineitemORC(veloxConverter); // Find and deserialize Substrait plan json file. - std::string planPath = + std::string subPlanPath = getDataFilePath("velox/substrait/tests", "data/q6_first_stage.json"); - - // Read q6_first_stage.json and resume the Substrait plan. - ::substrait::Plan substraitPlan; - JsonToProtoConverter::readFromFile(planPath, substraitPlan); - - // Convert to Velox PlanNode. - facebook::velox::substrait::SubstraitVeloxPlanConverter planConverter; - auto planNode = planConverter.toVeloxPlan(substraitPlan, pool_.get()); + auto resIter = veloxConverter->getResIter(subPlanPath); + while (resIter->HasNext()) { + auto rv = resIter->Next(); + auto size = rv->size(); + ASSERT_EQ(size, 1); + std::string res = rv->toString(0); + ASSERT_EQ(res, "{13613.1921}"); + } } // This test will firstly generate mock TPC-H lineitem ORC file. Then, Velox's diff --git a/velox/vector/arrow/c/Bridge.cpp b/velox/vector/arrow/c/Bridge.cpp index 69d63e938bd0..e11ed59f7bd4 100644 --- a/velox/vector/arrow/c/Bridge.cpp +++ b/velox/vector/arrow/c/Bridge.cpp @@ -21,7 +21,7 @@ #include "velox/common/base/Exceptions.h" #include "velox/vector/ComplexVector.h" #include "velox/vector/FlatVector.h" -#include "velox/vector/arrow/Abi.h" +#include "velox/vector/arrow/c/abi.h" namespace facebook::velox {