From b773d0ded78fae94ce0cfe4a874ebf50a34696b0 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 18 Nov 2016 15:58:31 -0500 Subject: [PATCH 1/7] Add gflags external project and json-integration-test executable stub Change-Id: I156149572f7913ff47db6adaba7ddb579c706a6e --- cpp/CMakeLists.txt | 43 ++++++++++++++++ cpp/cmake_modules/FindGFlags.cmake | 60 ++++++++++++++++++++++ cpp/src/arrow/ipc/CMakeLists.txt | 21 ++++++++ cpp/src/arrow/ipc/json-integration-test.cc | 51 ++++++++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 cpp/cmake_modules/FindGFlags.cmake create mode 100644 cpp/src/arrow/ipc/json-integration-test.cc diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0bff7528578..d2fa8467bfa 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -26,6 +26,7 @@ include(ExternalProject) set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support") set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty") +set(GFLAGS_VERSION "2.1.2") set(GTEST_VERSION "1.7.0") set(GBENCHMARK_VERSION "1.0.0") set(FLATBUFFERS_VERSION "1.3.0") @@ -506,6 +507,48 @@ if(ARROW_BUILD_TESTS) if(GTEST_VENDORED) add_dependencies(gtest googletest_ep) endif() + + # gflags (formerly Googleflags) command line parsing + if("$ENV{GFLAGS_HOME}" STREQUAL "") + if(APPLE) + set(GFLAGS_CMAKE_CXX_FLAGS "-fPIC -std=c++11 -stdlib=libc++") + else() + set(GFLAGS_CMAKE_CXX_FLAGS "-fPIC") + endif() + + set(GFLAGS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep") + ExternalProject_Add(gflags_ep + URL "https://github.com/gflags/gflags/archive/v${GFLAGS_VERSION}.tar.gz" + BUILD_IN_SOURCE 1 + CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_INSTALL_PREFIX=${GFLAGS_PREFIX} + -DBUILD_SHARED_LIBS=OFF + -DBUILD_STATIC_LIBS=ON + -DBUILD_PACKAGING=OFF + -DBUILD_TESTING=OFF + -DBUILD_NC_TESTS=OFF + -BUILD_CONFIG_TESTS=OFF + -DINSTALL_HEADERS=ON + -DCMAKE_CXX_FLAGS=${GFLAGS_CMAKE_CXX_FLAGS}) + + set(GFLAGS_HOME "${GFLAGS_PREFIX}") + set(GFLAGS_INCLUDE_DIR "${GFLAGS_PREFIX}/include") + set(GFLAGS_STATIC_LIB "${GFLAGS_PREFIX}/lib/libgflags.a") + set(GFLAGS_VENDORED 1) + else() + set(GFLAGS_VENDORED 0) + find_package(GFlags REQUIRED) + endif() + + message(STATUS "GFlags include dir: ${GFLAGS_INCLUDE_DIR}") + message(STATUS "GFlags static library: ${GFLAGS_STATIC_LIB}") + include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR}) + ADD_THIRDPARTY_LIB(gflags + STATIC_LIB ${GFLAGS_STATIC_LIB}) + + if(GFLAGS_VENDORED) + add_dependencies(gflags gflags_ep) + endif() endif() if(ARROW_BUILD_BENCHMARKS) diff --git a/cpp/cmake_modules/FindGFlags.cmake b/cpp/cmake_modules/FindGFlags.cmake new file mode 100644 index 00000000000..eaea8353054 --- /dev/null +++ b/cpp/cmake_modules/FindGFlags.cmake @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# - Find GFLAGS (gflags.h, libgflags.a, libgflags.so, and libgflags.so.0) +# This module defines +# GFLAGS_INCLUDE_DIR, directory containing headers +# GFLAGS_SHARED_LIB, path to libgflags shared library +# GFLAGS_STATIC_LIB, path to libgflags static library +# GFLAGS_FOUND, whether gflags has been found + +if( NOT "$ENV{GFLAGS_HOME}" STREQUAL "") + file( TO_CMAKE_PATH "$ENV{GFLAGS_HOME}" _native_path ) + list( APPEND _gflags_roots ${_native_path} ) +elseif ( GFlags_HOME ) + list( APPEND _gflags_roots ${GFlags_HOME} ) +endif() + +if ( _gflags_roots ) + find_path(GFLAGS_INCLUDE_DIR NAMES gflags/gflags.h + PATHS ${_gflags_roots} + NO_DEFAULT_PATH + PATH_SUFFIXES "include" ) + find_library(GFLAGS_SHARED_LIB NAMES gflags + PATHS ${_gflags_roots} + NO_DEFAULT_PATH + PATH_SUFFIXES "lib" ) + find_library(GFLAGS_SHARED_LIB NAMES libgflags.a + PATHS ${_gflags_roots} + NO_DEFAULT_PATH + PATH_SUFFIXES "lib" ) +else() + find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h + # make sure we don't accidentally pick up a different version + NO_CMAKE_SYSTEM_PATH + NO_SYSTEM_ENVIRONMENT_PATH) + find_library(GFLAGS_SHARED_LIB gflags + NO_CMAKE_SYSTEM_PATH + NO_SYSTEM_ENVIRONMENT_PATH) + find_library(GFLAGS_STATIC_LIB libgflags.a + NO_CMAKE_SYSTEM_PATH + NO_SYSTEM_ENVIRONMENT_PATH) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(GFLAGS REQUIRED_VARS + GFLAGS_SHARED_LIB GFLAGS_STATIC_LIB GFLAGS_INCLUDE_DIR) diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 6955bcb6c23..58f87f42751 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -85,6 +85,27 @@ ADD_ARROW_TEST(ipc-json-test) ARROW_TEST_LINK_LIBRARIES(ipc-json-test ${ARROW_IPC_TEST_LINK_LIBS}) +add_executable(json-integration-test json-integration-test.cc) + +if (APPLE) + target_link_libraries(json-integration-test + arrow_static + arrow_io + arrow_ipc + gflags + dl) + set_target_properties(json-integration-test + PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") +else() + target_link_libraries(json-integration-test + arrow_static + arrow_io + arrow_ipc + gflags + pthread + dl) +endif() + # make clean will delete the generated file set_source_files_properties(Metadata_generated.h PROPERTIES GENERATED TRUE) diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc new file mode 100644 index 00000000000..15562be4a5a --- /dev/null +++ b/cpp/src/arrow/ipc/json-integration-test.cc @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include + +#include "gflags/gflags.h" + +#include "arrow/array.h" +#include "arrow/ipc/json.h" +#include "arrow/table.h" +#include "arrow/test-util.h" +#include "arrow/type.h" +#include "arrow/type_traits.h" +#include "arrow/types/primitive.h" +#include "arrow/types/string.h" +#include "arrow/types/struct.h" +#include "arrow/util/memory-pool.h" +#include "arrow/util/status.h" + +DEFINE_string(infile, "", "Input file name"); +DEFINE_string(outfile, "", "Output file name"); +DEFINE_string(mode, "VALIDATE", + "Mode of integration testing tool (ARROW_TO_JSON, JSON_TO_ARROW, VALIDATE)"); + +namespace arrow { + +} // namespace arrow + +int main(int argc, char** argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); +} From 1752249b9253cd05da690c83d3b3d1c35fb95581 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sun, 20 Nov 2016 16:29:48 -0500 Subject: [PATCH 2/7] Draft integration testing CLI tool, untested Change-Id: Ic0b5d44bc2ab04a1a4b6a2dabf3c932b39249787 --- cpp/src/arrow/io/file.cc | 7 +- cpp/src/arrow/ipc/CMakeLists.txt | 2 + cpp/src/arrow/ipc/adapter.h | 2 +- cpp/src/arrow/ipc/json-integration-test.cc | 185 ++++++++++++++++++++- 4 files changed, 190 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc index 93f0ad91ee8..05fa6663e33 100644 --- a/cpp/src/arrow/io/file.cc +++ b/cpp/src/arrow/io/file.cc @@ -186,12 +186,13 @@ static inline Status FileOpenWriteable(const std::string& filename, int* fd) { memcpy(wpath.data(), filename.data(), filename.size()); memcpy(wpath.data() + nwchars, L"\0", sizeof(wchar_t)); - errno_actual = _wsopen_s( - fd, wpath.data(), _O_WRONLY | _O_CREAT | _O_BINARY, _SH_DENYNO, _S_IWRITE); + errno_actual = _wsopen_s(fd, wpath.data(), _O_WRONLY | _O_CREAT | _O_BINARY | _O_TRUNC, + _SH_DENYNO, _S_IWRITE); ret = *fd; #else - ret = *fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_BINARY, ARROW_WRITE_SHMODE); + ret = *fd = + open(filename.c_str(), O_WRONLY | O_CREAT | O_BINARY | O_TRUNC, ARROW_WRITE_SHMODE); #endif return CheckOpenResult(ret, errno_actual, filename.c_str(), filename.size()); } diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 58f87f42751..2e9487f92d9 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -93,6 +93,7 @@ if (APPLE) arrow_io arrow_ipc gflags + gtest dl) set_target_properties(json-integration-test PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") @@ -102,6 +103,7 @@ else() arrow_io arrow_ipc gflags + gtest pthread dl) endif() diff --git a/cpp/src/arrow/ipc/adapter.h b/cpp/src/arrow/ipc/adapter.h index 3fde18dde83..b02de284dfc 100644 --- a/cpp/src/arrow/ipc/adapter.h +++ b/cpp/src/arrow/ipc/adapter.h @@ -16,7 +16,7 @@ // under the License. // Public API for writing and accessing (with zero copy, if possible) Arrow -// data in shared memory +// IPC binary formatted data (e.g. in shared memory, or from some other IO source) #ifndef ARROW_IPC_ADAPTER_H #define ARROW_IPC_ADAPTER_H diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc index 15562be4a5a..f6a2fe655a1 100644 --- a/cpp/src/arrow/ipc/json-integration-test.cc +++ b/cpp/src/arrow/ipc/json-integration-test.cc @@ -17,15 +17,20 @@ #include #include +#include #include +#include #include #include #include #include #include "gflags/gflags.h" +#include "gtest/gtest.h" #include "arrow/array.h" +#include "arrow/io/file.h" +#include "arrow/ipc/file.h" #include "arrow/ipc/json.h" #include "arrow/table.h" #include "arrow/test-util.h" @@ -37,15 +42,191 @@ #include "arrow/util/memory-pool.h" #include "arrow/util/status.h" -DEFINE_string(infile, "", "Input file name"); -DEFINE_string(outfile, "", "Output file name"); +DEFINE_string(arrow, "", "Arrow file name"); +DEFINE_string(json, "", "JSON file name"); DEFINE_string(mode, "VALIDATE", "Mode of integration testing tool (ARROW_TO_JSON, JSON_TO_ARROW, VALIDATE)"); +DEFINE_bool(unittest, false, "Run integration test self unit tests"); namespace arrow { +bool file_exists(const char* path) { + std::ifstream handle(path); + return handle.good(); +} + +// Convert JSON file to IPC binary format +static Status ConvertJsonToArrow( + const std::string& json_path, const std::string& arrow_path) { + std::shared_ptr in_file; + std::shared_ptr out_file; + + RETURN_NOT_OK(io::ReadableFile::Open(json_path, &in_file)); + RETURN_NOT_OK(io::FileOutputStream::Open(arrow_path, &out_file)); + + int64_t file_size = 0; + RETURN_NOT_OK(in_file->GetSize(&file_size)); + + std::shared_ptr json_buffer; + RETURN_NOT_OK(in_file->Read(file_size, &json_buffer)); + + std::unique_ptr reader; + RETURN_NOT_OK(ipc::JsonReader::Open(json_buffer, &reader)); + + std::cout << "Found schema: " << reader->schema()->ToString() << std::endl; + + std::shared_ptr writer; + RETURN_NOT_OK(ipc::FileWriter::Open(out_file.get(), reader->schema(), &writer)); + + for (int i = 0; i < reader->num_record_batches(); ++i) { + std::shared_ptr batch; + RETURN_NOT_OK(reader->GetRecordBatch(i, &batch)); + RETURN_NOT_OK(writer->WriteRecordBatch(batch->columns(), batch->num_rows())); + } + return writer->Close(); +} + +// Convert IPC binary format to JSON +static Status ConvertArrowToJson( + const std::string& arrow_path, const std::string& json_path) { + std::shared_ptr in_file; + std::shared_ptr out_file; + + RETURN_NOT_OK(io::ReadableFile::Open(arrow_path, &in_file)); + RETURN_NOT_OK(io::FileOutputStream::Open(json_path, &out_file)); + + std::shared_ptr reader; + RETURN_NOT_OK(ipc::FileReader::Open(in_file, &reader)); + + std::cout << "Found schema: " << reader->schema()->ToString() << std::endl; + + std::unique_ptr writer; + RETURN_NOT_OK(ipc::JsonWriter::Open(reader->schema(), &writer)); + + for (int i = 0; i < reader->num_record_batches(); ++i) { + std::shared_ptr batch; + RETURN_NOT_OK(reader->GetRecordBatch(i, &batch)); + RETURN_NOT_OK(writer->WriteRecordBatch(batch->columns(), batch->num_rows())); + } + + std::string result; + RETURN_NOT_OK(writer->Finish(&result)); + return out_file->Write(reinterpret_cast(result.c_str()), + static_cast(result.size())); +} + +static Status ValidateArrowVsJson( + const std::string& arrow_path, const std::string& json_path) { + // Construct JSON reader + std::shared_ptr json_file; + RETURN_NOT_OK(io::ReadableFile::Open(json_path, &json_file)); + + int64_t file_size = 0; + RETURN_NOT_OK(json_file->GetSize(&file_size)); + + std::shared_ptr json_buffer; + RETURN_NOT_OK(json_file->Read(file_size, &json_buffer)); + + std::unique_ptr json_reader; + RETURN_NOT_OK(ipc::JsonReader::Open(json_buffer, &json_reader)); + + // Construct Arrow reader + std::shared_ptr arrow_file; + RETURN_NOT_OK(io::ReadableFile::Open(arrow_path, &arrow_file)); + + std::shared_ptr arrow_reader; + RETURN_NOT_OK(ipc::FileReader::Open(arrow_file, &arrow_reader)); + + auto json_schema = json_reader->schema(); + auto arrow_schema = arrow_reader->schema(); + + if (!json_schema->Equals(arrow_schema)) { + std::stringstream ss; + ss << "JSON schema: \n" + << json_schema->ToString() << "\n" + << "Arrow schema: \n" + << arrow_schema->ToString(); + + std::cout << ss.str() << std::endl; + return Status::Invalid("Schemas did not match"); + } + + const int json_nbatches = json_reader->num_record_batches(); + const int arrow_nbatches = arrow_reader->num_record_batches(); + + if (json_nbatches != arrow_nbatches) { + std::stringstream ss; + ss << "Different number of record batches: " << json_nbatches << " (JSON) vs " + << arrow_nbatches << " (Arrow)"; + return Status::Invalid(ss.str()); + } + + std::shared_ptr arrow_batch; + std::shared_ptr json_batch; + for (int i = 0; i < json_nbatches; ++i) { + RETURN_NOT_OK(json_reader->GetRecordBatch(i, &json_batch)); + RETURN_NOT_OK(arrow_reader->GetRecordBatch(i, &arrow_batch)); + + if (!json_batch->Equals(*arrow_batch.get())) { + std::stringstream ss; + ss << "Record batch " << i << " did not match"; + return Status::Invalid(ss.str()); + } + } + + return Status::OK(); +} + +Status RunCommand(const std::string& json_path, const std::string& arrow_path, + const std::string& command) { + if (json_path == "") { return Status::Invalid("Must specify json file name"); } + + if (arrow_path == "") { return Status::Invalid("Must specify arrow file name"); } + + if (command == "ARROW_TO_JSON") { + if (!file_exists(arrow_path.c_str())) { + return Status::Invalid("Input file does not exist"); + } + + return ConvertArrowToJson(arrow_path, json_path); + } else if (command == "JSON_TO_ARROW") { + if (!file_exists(json_path.c_str())) { + return Status::Invalid("Input file does not exist"); + } + + return ConvertJsonToArrow(json_path, arrow_path); + } else if (command == "VALIDATE") { + if (!file_exists(json_path.c_str())) { + return Status::Invalid("JSON file does not exist"); + } + + if (!file_exists(arrow_path.c_str())) { + return Status::Invalid("Arrow file does not exist"); + } + + return ValidateArrowVsJson(arrow_path, json_path); + } else { + std::stringstream ss; + ss << "Unknown command: " << command; + return Status::Invalid(ss.str()); + } +} + } // namespace arrow int main(int argc, char** argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); + + if (FLAGS_unittest) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); + } + + arrow::Status result = arrow::RunCommand(FLAGS_json, FLAGS_arrow, FLAGS_mode); + if (!result.ok()) { + std::cout << "Error message: " << result.ToString() << std::endl; + return 1; + } + + return 0; } From ca1eade1882870c8d8c366d75b32c6e6f7ec6388 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sun, 20 Nov 2016 16:31:52 -0500 Subject: [PATCH 3/7] Clean up includes Change-Id: I0d915b0309ba1515422d23d9ec738bbe3ec0a1f8 --- cpp/src/arrow/ipc/json-integration-test.cc | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc index f6a2fe655a1..996e94b159c 100644 --- a/cpp/src/arrow/ipc/json-integration-test.cc +++ b/cpp/src/arrow/ipc/json-integration-test.cc @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include #include #include #include @@ -23,23 +22,15 @@ #include #include #include -#include #include "gflags/gflags.h" #include "gtest/gtest.h" -#include "arrow/array.h" #include "arrow/io/file.h" #include "arrow/ipc/file.h" #include "arrow/ipc/json.h" +#include "arrow/schema.h" #include "arrow/table.h" -#include "arrow/test-util.h" -#include "arrow/type.h" -#include "arrow/type_traits.h" -#include "arrow/types/primitive.h" -#include "arrow/types/string.h" -#include "arrow/types/struct.h" -#include "arrow/util/memory-pool.h" #include "arrow/util/status.h" DEFINE_string(arrow, "", "Arrow file name"); From dbc6aab2c0df7845abd8b55ecc53c9dbe231b5a1 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 21 Nov 2016 12:24:44 -0500 Subject: [PATCH 4/7] Add unit tests for the integration validator Change-Id: Ie39ca80405bad788f306c38853fbb5e768b5a3ef --- ci/travis_script_cpp.sh | 3 + cpp/src/arrow/ipc/CMakeLists.txt | 4 + cpp/src/arrow/ipc/json-integration-test.cc | 161 ++++++++++++++++++++- 3 files changed, 165 insertions(+), 3 deletions(-) diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh index d555cab3e64..52b5c85e2f3 100755 --- a/ci/travis_script_cpp.sh +++ b/ci/travis_script_cpp.sh @@ -34,4 +34,7 @@ make lint ctest -VV -L unittest +# Also run the integration tests self tests +debug/json-integration-test --unittest --verbose + popd diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 2e9487f92d9..329c6f88f0c 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -94,6 +94,8 @@ if (APPLE) arrow_ipc gflags gtest + boost_filesystem_static + boost_system_static dl) set_target_properties(json-integration-test PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") @@ -105,6 +107,8 @@ else() gflags gtest pthread + boost_filesystem_static + boost_system_static dl) endif() diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc index 996e94b159c..d8a3c105ef0 100644 --- a/cpp/src/arrow/ipc/json-integration-test.cc +++ b/cpp/src/arrow/ipc/json-integration-test.cc @@ -26,11 +26,14 @@ #include "gflags/gflags.h" #include "gtest/gtest.h" +#include // NOLINT + #include "arrow/io/file.h" #include "arrow/ipc/file.h" #include "arrow/ipc/json.h" #include "arrow/schema.h" #include "arrow/table.h" +#include "arrow/test-util.h" #include "arrow/util/status.h" DEFINE_string(arrow, "", "Arrow file name"); @@ -38,6 +41,9 @@ DEFINE_string(json, "", "JSON file name"); DEFINE_string(mode, "VALIDATE", "Mode of integration testing tool (ARROW_TO_JSON, JSON_TO_ARROW, VALIDATE)"); DEFINE_bool(unittest, false, "Run integration test self unit tests"); +DEFINE_bool(verbose, false, "Verbose output"); + +namespace fs = boost::filesystem; namespace arrow { @@ -64,7 +70,9 @@ static Status ConvertJsonToArrow( std::unique_ptr reader; RETURN_NOT_OK(ipc::JsonReader::Open(json_buffer, &reader)); - std::cout << "Found schema: " << reader->schema()->ToString() << std::endl; + if (FLAGS_verbose) { + std::cout << "Found schema: " << reader->schema()->ToString() << std::endl; + } std::shared_ptr writer; RETURN_NOT_OK(ipc::FileWriter::Open(out_file.get(), reader->schema(), &writer)); @@ -89,7 +97,9 @@ static Status ConvertArrowToJson( std::shared_ptr reader; RETURN_NOT_OK(ipc::FileReader::Open(in_file, &reader)); - std::cout << "Found schema: " << reader->schema()->ToString() << std::endl; + if (FLAGS_verbose) { + std::cout << "Found schema: " << reader->schema()->ToString() << std::endl; + } std::unique_ptr writer; RETURN_NOT_OK(ipc::JsonWriter::Open(reader->schema(), &writer)); @@ -138,7 +148,9 @@ static Status ValidateArrowVsJson( << "Arrow schema: \n" << arrow_schema->ToString(); - std::cout << ss.str() << std::endl; + if (FLAGS_verbose) { + std::cout << ss.str() << std::endl; + } return Status::Invalid("Schemas did not match"); } @@ -203,6 +215,149 @@ Status RunCommand(const std::string& json_path, const std::string& arrow_path, } } +static std::string temp_path() { + return (fs::temp_directory_path() / fs::unique_path()).native(); +} + +class TestJSONIntegration : public ::testing::Test { + public: + void SetUp() {} + + std::string mkstemp() { + auto path = temp_path(); + tmp_paths_.push_back(path); + return path; + } + + Status WriteJson(const char* data, const std::string& path) { + do { + std::shared_ptr out; + RETURN_NOT_OK(io::FileOutputStream::Open(path, &out)); + RETURN_NOT_OK(out->Write(reinterpret_cast(data), + static_cast(strlen(data)))); + } while (0); + return Status::OK(); + } + + void TearDown() { + for (const std::string path : tmp_paths_) { + std::remove(path.c_str()); + } + } + + protected: + std::vector tmp_paths_; +}; + +static const char* JSON_EXAMPLE = R"example( +{ + "schema": { + "fields": [ + { + "name": "foo", + "type": {"name": "int", "isSigned": true, "bitWidth": 32}, + "nullable": true, "children": [], + "typeLayout": [ + {"type": "VALIDITY", "typeBitWidth": 1}, + {"type": "DATA", "typeBitWidth": 32} + ] + }, + { + "name": "bar", + "type": {"name": "floatingpoint", "precision": "DOUBLE"}, + "nullable": true, "children": [], + "typeLayout": [ + {"type": "VALIDITY", "typeBitWidth": 1}, + {"type": "DATA", "typeBitWidth": 64} + ] + } + ] + }, + "batches": [ + { + "count": 5, + "columns": [ + { + "name": "foo", + "count": 5, + "DATA": [1, 2, 3, 4, 5], + "VALIDITY": [1, 0, 1, 1, 1] + }, + { + "name": "bar", + "count": 5, + "DATA": [1.0, 2.0, 3.0, 4.0, 5.0], + "VALIDITY": [1, 0, 0, 1, 1] + } + ] + } + ] +} +)example"; + +static const char* JSON_EXAMPLE2 = R"example( +{ + "schema": { + "fields": [ + { + "name": "foo", + "type": {"name": "int", "isSigned": true, "bitWidth": 32}, + "nullable": true, "children": [], + "typeLayout": [ + {"type": "VALIDITY", "typeBitWidth": 1}, + {"type": "DATA", "typeBitWidth": 32} + ] + } + ] + }, + "batches": [ + { + "count": 5, + "columns": [ + { + "name": "foo", + "count": 5, + "DATA": [1, 2, 3, 4, 5], + "VALIDITY": [1, 0, 1, 1, 1] + } + ] + } + ] +} +)example"; + +TEST_F(TestJSONIntegration, ConvertAndValidate) { + std::string json_path = this->mkstemp(); + std::string arrow_path = this->mkstemp(); + + ASSERT_OK(WriteJson(JSON_EXAMPLE, json_path)); + + ASSERT_OK(ConvertJsonToArrow(json_path, arrow_path)); + ASSERT_OK(ValidateArrowVsJson(arrow_path, json_path)); + + // Convert and overwrite + ASSERT_OK(ConvertArrowToJson(arrow_path, json_path)); + + // Convert back to arrow, and validate + ASSERT_OK(ConvertJsonToArrow(json_path, arrow_path)); + ASSERT_OK(ValidateArrowVsJson(arrow_path, json_path)); +} + +TEST_F(TestJSONIntegration, ErrorStates) { + std::string json_path = this->mkstemp(); + std::string json_path2 = this->mkstemp(); + std::string arrow_path = this->mkstemp(); + + ASSERT_OK(WriteJson(JSON_EXAMPLE, json_path)); + ASSERT_OK(WriteJson(JSON_EXAMPLE2, json_path2)); + + ASSERT_OK(ConvertJsonToArrow(json_path, arrow_path)); + ASSERT_RAISES(Invalid, ValidateArrowVsJson(arrow_path, json_path2)); + + ASSERT_RAISES(IOError, ValidateArrowVsJson("does_not_exist-1234", json_path2)); + ASSERT_RAISES(IOError, ValidateArrowVsJson(arrow_path, "does_not_exist-1234")); +} + } // namespace arrow int main(int argc, char** argv) { From bdf1f7a0171429db4d9fdc886691e61fb91984d7 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 21 Nov 2016 12:28:53 -0500 Subject: [PATCH 5/7] Call the RunCommand method instead Change-Id: If5af704126a30a45ad7b785f82f70d2da126b32a --- cpp/src/arrow/ipc/json-integration-test.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc index d8a3c105ef0..89697e936a9 100644 --- a/cpp/src/arrow/ipc/json-integration-test.cc +++ b/cpp/src/arrow/ipc/json-integration-test.cc @@ -332,15 +332,15 @@ TEST_F(TestJSONIntegration, ConvertAndValidate) { ASSERT_OK(WriteJson(JSON_EXAMPLE, json_path)); - ASSERT_OK(ConvertJsonToArrow(json_path, arrow_path)); - ASSERT_OK(ValidateArrowVsJson(arrow_path, json_path)); + ASSERT_OK(RunCommand(json_path, arrow_path, "JSON_TO_ARROW")); + ASSERT_OK(RunCommand(json_path, arrow_path, "VALIDATE")); // Convert and overwrite - ASSERT_OK(ConvertArrowToJson(arrow_path, json_path)); + ASSERT_OK(RunCommand(json_path, arrow_path, "ARROW_TO_JSON")); // Convert back to arrow, and validate - ASSERT_OK(ConvertJsonToArrow(json_path, arrow_path)); - ASSERT_OK(ValidateArrowVsJson(arrow_path, json_path)); + ASSERT_OK(RunCommand(json_path, arrow_path, "JSON_TO_ARROW")); + ASSERT_OK(RunCommand(json_path, arrow_path, "VALIDATE")); } TEST_F(TestJSONIntegration, ErrorStates) { @@ -356,6 +356,9 @@ TEST_F(TestJSONIntegration, ErrorStates) { ASSERT_RAISES(IOError, ValidateArrowVsJson("does_not_exist-1234", json_path2)); ASSERT_RAISES(IOError, ValidateArrowVsJson(arrow_path, "does_not_exist-1234")); + + ASSERT_RAISES(Invalid, RunCommand("", arrow_path, "VALIDATE")); + ASSERT_RAISES(Invalid, RunCommand(json_path, "", "VALIDATE")); } } // namespace arrow From f563d3a1da15d40148defbe3e5a925d1cdf74fa1 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 21 Nov 2016 12:43:25 -0500 Subject: [PATCH 6/7] Build integration test as a normal unit test executable, opt-in to integration testing Change-Id: I5cad36511c88dd7d6b8a05d4f58d8ee98da35e3f --- ci/travis_script_cpp.sh | 3 --- cpp/src/arrow/ipc/CMakeLists.txt | 2 +- cpp/src/arrow/ipc/json-integration-test.cc | 19 +++++++++---------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh index 52b5c85e2f3..d555cab3e64 100755 --- a/ci/travis_script_cpp.sh +++ b/ci/travis_script_cpp.sh @@ -34,7 +34,4 @@ make lint ctest -VV -L unittest -# Also run the integration tests self tests -debug/json-integration-test --unittest --verbose - popd diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 329c6f88f0c..f9e7cf792ae 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -85,7 +85,7 @@ ADD_ARROW_TEST(ipc-json-test) ARROW_TEST_LINK_LIBRARIES(ipc-json-test ${ARROW_IPC_TEST_LINK_LIBS}) -add_executable(json-integration-test json-integration-test.cc) +ADD_ARROW_TEST(json-integration-test) if (APPLE) target_link_libraries(json-integration-test diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc index 89697e936a9..6c30820f4ad 100644 --- a/cpp/src/arrow/ipc/json-integration-test.cc +++ b/cpp/src/arrow/ipc/json-integration-test.cc @@ -40,8 +40,8 @@ DEFINE_string(arrow, "", "Arrow file name"); DEFINE_string(json, "", "JSON file name"); DEFINE_string(mode, "VALIDATE", "Mode of integration testing tool (ARROW_TO_JSON, JSON_TO_ARROW, VALIDATE)"); -DEFINE_bool(unittest, false, "Run integration test self unit tests"); -DEFINE_bool(verbose, false, "Verbose output"); +DEFINE_bool(integration, false, "Run in integration test mode"); +DEFINE_bool(verbose, true, "Verbose output"); namespace fs = boost::filesystem; @@ -366,16 +366,15 @@ TEST_F(TestJSONIntegration, ErrorStates) { int main(int argc, char** argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_unittest) { + if (FLAGS_integration) { + arrow::Status result = arrow::RunCommand(FLAGS_json, FLAGS_arrow, FLAGS_mode); + if (!result.ok()) { + std::cout << "Error message: " << result.ToString() << std::endl; + return 1; + } + } else { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } - - arrow::Status result = arrow::RunCommand(FLAGS_json, FLAGS_arrow, FLAGS_mode); - if (!result.ok()) { - std::cout << "Error message: " << result.ToString() << std::endl; - return 1; - } - return 0; } From 7b29b24fc3c597d2f1ca94b8d03370107f00287d Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 21 Nov 2016 15:03:19 -0500 Subject: [PATCH 7/7] Use git master gflags to avoid valgrind error Change-Id: Ib36148918bc08435e40edea4f6823c038c8c7075 --- cpp/CMakeLists.txt | 5 +++-- cpp/src/arrow/ipc/json-integration-test.cc | 17 +++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d2fa8467bfa..839ea17dee0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -518,7 +518,9 @@ if(ARROW_BUILD_TESTS) set(GFLAGS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep") ExternalProject_Add(gflags_ep - URL "https://github.com/gflags/gflags/archive/v${GFLAGS_VERSION}.tar.gz" + GIT_REPOSITORY https://github.com/gflags/gflags.git + GIT_TAG cce68f0c9c5d054017425e6e6fd54f696d36e8ee + # URL "https://github.com/gflags/gflags/archive/v${GFLAGS_VERSION}.tar.gz" BUILD_IN_SOURCE 1 CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${GFLAGS_PREFIX} @@ -526,7 +528,6 @@ if(ARROW_BUILD_TESTS) -DBUILD_STATIC_LIBS=ON -DBUILD_PACKAGING=OFF -DBUILD_TESTING=OFF - -DBUILD_NC_TESTS=OFF -BUILD_CONFIG_TESTS=OFF -DINSTALL_HEADERS=ON -DCMAKE_CXX_FLAGS=${GFLAGS_CMAKE_CXX_FLAGS}) diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc index 6c30820f4ad..5eff8998afb 100644 --- a/cpp/src/arrow/ipc/json-integration-test.cc +++ b/cpp/src/arrow/ipc/json-integration-test.cc @@ -148,9 +148,7 @@ static Status ValidateArrowVsJson( << "Arrow schema: \n" << arrow_schema->ToString(); - if (FLAGS_verbose) { - std::cout << ss.str() << std::endl; - } + if (FLAGS_verbose) { std::cout << ss.str() << std::endl; } return Status::Invalid("Schemas did not match"); } @@ -233,8 +231,8 @@ class TestJSONIntegration : public ::testing::Test { do { std::shared_ptr out; RETURN_NOT_OK(io::FileOutputStream::Open(path, &out)); - RETURN_NOT_OK(out->Write(reinterpret_cast(data), - static_cast(strlen(data)))); + RETURN_NOT_OK(out->Write( + reinterpret_cast(data), static_cast(strlen(data)))); } while (0); return Status::OK(); } @@ -366,15 +364,18 @@ TEST_F(TestJSONIntegration, ErrorStates) { int main(int argc, char** argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); + int ret = 0; + if (FLAGS_integration) { arrow::Status result = arrow::RunCommand(FLAGS_json, FLAGS_arrow, FLAGS_mode); if (!result.ok()) { std::cout << "Error message: " << result.ToString() << std::endl; - return 1; + ret = 1; } } else { ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + ret = RUN_ALL_TESTS(); } - return 0; + gflags::ShutDownCommandLineFlags(); + return ret; }