diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh index 20307736e67..73bdaeb81fe 100755 --- a/ci/travis_before_script_cpp.sh +++ b/ci/travis_before_script_cpp.sh @@ -26,8 +26,6 @@ CPP_DIR=$TRAVIS_BUILD_DIR/cpp CMAKE_COMMON_FLAGS="\ -DARROW_BUILD_BENCHMARKS=ON \ --DARROW_PARQUET=OFF \ --DARROW_HDFS=ON \ -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL" if [ $TRAVIS_OS_NAME == "linux" ]; then diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 93e9853df89..47ca2fd3765 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -74,10 +74,6 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") "Build the Arrow IPC extensions" ON) - option(ARROW_HDFS - "Build the Arrow IO extensions for the Hadoop file system" - OFF) - option(ARROW_BOOST_USE_SHARED "Rely on boost shared libraries where relevant" ON) diff --git a/cpp/src/arrow/io/CMakeLists.txt b/cpp/src/arrow/io/CMakeLists.txt index 2062cd43b7b..1e65a1a46ab 100644 --- a/cpp/src/arrow/io/CMakeLists.txt +++ b/cpp/src/arrow/io/CMakeLists.txt @@ -45,50 +45,30 @@ set(ARROW_IO_TEST_LINK_LIBS set(ARROW_IO_SRCS file.cc + hdfs.cc + hdfs-internal.cc interfaces.cc memory.cc ) -if(ARROW_HDFS) - if(NOT THIRDPARTY_DIR) - message(FATAL_ERROR "THIRDPARTY_DIR not set") - endif() - - if (DEFINED ENV{HADOOP_HOME}) - set(HADOOP_HOME $ENV{HADOOP_HOME}) - if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h") - message(STATUS "Did not find hdfs.h in expected location, using vendored one") - set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop") - endif() - else() +# HDFS thirdparty setup +if (DEFINED ENV{HADOOP_HOME}) + set(HADOOP_HOME $ENV{HADOOP_HOME}) + if (NOT EXISTS "${HADOOP_HOME}/include/hdfs.h") + message(STATUS "Did not find hdfs.h in expected location, using vendored one") set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop") endif() +else() + set(HADOOP_HOME "${THIRDPARTY_DIR}/hadoop") +endif() - set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h") - if (NOT EXISTS ${HDFS_H_PATH}) - message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}") - endif() - message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH}) - message(STATUS "Building libhdfs shim component") - - include_directories(SYSTEM "${HADOOP_HOME}/include") - - set(ARROW_HDFS_SRCS - hdfs.cc - hdfs-internal.cc) - - set_property(SOURCE ${ARROW_HDFS_SRCS} - APPEND_STRING PROPERTY - COMPILE_FLAGS "-DHAS_HADOOP") - - set(ARROW_IO_SRCS - ${ARROW_HDFS_SRCS} - ${ARROW_IO_SRCS}) - - ADD_ARROW_TEST(io-hdfs-test) - ARROW_TEST_LINK_LIBRARIES(io-hdfs-test - ${ARROW_IO_TEST_LINK_LIBS}) +set(HDFS_H_PATH "${HADOOP_HOME}/include/hdfs.h") +if (NOT EXISTS ${HDFS_H_PATH}) + message(FATAL_ERROR "Did not find hdfs.h at ${HDFS_H_PATH}") endif() +message(STATUS "Found hdfs.h at: " ${HDFS_H_PATH}) + +include_directories(SYSTEM "${HADOOP_HOME}/include") add_library(arrow_io SHARED ${ARROW_IO_SRCS} @@ -119,6 +99,10 @@ ADD_ARROW_TEST(io-file-test) ARROW_TEST_LINK_LIBRARIES(io-file-test ${ARROW_IO_TEST_LINK_LIBS}) +ADD_ARROW_TEST(io-hdfs-test) +ARROW_TEST_LINK_LIBRARIES(io-hdfs-test + ${ARROW_IO_TEST_LINK_LIBS}) + ADD_ARROW_TEST(io-memory-test) ARROW_TEST_LINK_LIBRARIES(io-memory-test ${ARROW_IO_TEST_LINK_LIBS}) diff --git a/cpp/src/arrow/io/hdfs-internal.cc b/cpp/src/arrow/io/hdfs-internal.cc index 7094785de02..e4b2cd55978 100644 --- a/cpp/src/arrow/io/hdfs-internal.cc +++ b/cpp/src/arrow/io/hdfs-internal.cc @@ -28,21 +28,7 @@ // This software may be modified and distributed under the terms // of the BSD license. See the LICENSE file for details. -#ifdef HAS_HADOOP - -#ifndef _WIN32 -#include -#else -#include -#include - -// TODO(wesm): address when/if we add windows support -// #include -#endif - -extern "C" { -#include -} +#include "arrow/io/hdfs-internal.h" #include #include @@ -53,7 +39,6 @@ extern "C" { #include // NOLINT -#include "arrow/io/hdfs-internal.h" #include "arrow/status.h" #include "arrow/util/visibility.h" @@ -265,7 +250,8 @@ static inline void* GetLibrarySymbol(void* handle, const char* symbol) { return dlsym(handle, symbol); #else - void* ret = reinterpret_cast(GetProcAddress(handle, symbol)); + void* ret = reinterpret_cast( + GetProcAddress(reinterpret_cast(handle), symbol)); if (ret == NULL) { // logstream(LOG_INFO) << "GetProcAddress error: " // << get_last_err_str(GetLastError()) << std::endl; @@ -537,7 +523,7 @@ Status LibHdfsShim::GetRequiredSymbols() { return Status::OK(); } -Status ARROW_EXPORT ConnectLibHdfs(LibHdfsShim** driver) { +Status ConnectLibHdfs(LibHdfsShim** driver) { static std::mutex lock; std::lock_guard guard(lock); @@ -562,7 +548,7 @@ Status ARROW_EXPORT ConnectLibHdfs(LibHdfsShim** driver) { return shim->GetRequiredSymbols(); } -Status ARROW_EXPORT ConnectLibHdfs3(LibHdfsShim** driver) { +Status ConnectLibHdfs3(LibHdfsShim** driver) { static std::mutex lock; std::lock_guard guard(lock); @@ -586,5 +572,3 @@ Status ARROW_EXPORT ConnectLibHdfs3(LibHdfsShim** driver) { } // namespace io } // namespace arrow - -#endif // HAS_HADOOP diff --git a/cpp/src/arrow/io/hdfs-internal.h b/cpp/src/arrow/io/hdfs-internal.h index 0ff118a8f57..8f9a06758cb 100644 --- a/cpp/src/arrow/io/hdfs-internal.h +++ b/cpp/src/arrow/io/hdfs-internal.h @@ -18,8 +18,25 @@ #ifndef ARROW_IO_HDFS_INTERNAL #define ARROW_IO_HDFS_INTERNAL +#ifndef _WIN32 +#include +#else + +// Windows defines min and max macros that mess up std::min/maxa +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#include + +// TODO(wesm): address when/if we add windows support +// #include +#endif + #include +#include "arrow/util/visibility.h" + namespace arrow { class Status; @@ -194,8 +211,9 @@ struct LibHdfsShim { Status GetRequiredSymbols(); }; -Status ConnectLibHdfs(LibHdfsShim** driver); -Status ConnectLibHdfs3(LibHdfsShim** driver); +// TODO(wesm): Remove these exports when we are linking statically +Status ARROW_EXPORT ConnectLibHdfs(LibHdfsShim** driver); +Status ARROW_EXPORT ConnectLibHdfs3(LibHdfsShim** driver); } // namespace io } // namespace arrow diff --git a/cpp/src/arrow/io/io-hdfs-test.cc b/cpp/src/arrow/io/io-hdfs-test.cc index 4ef47b8babe..72e0ba8f298 100644 --- a/cpp/src/arrow/io/io-hdfs-test.cc +++ b/cpp/src/arrow/io/io-hdfs-test.cc @@ -79,7 +79,7 @@ class TestHdfsClient : public ::testing::Test { client_ = nullptr; scratch_dir_ = - boost::filesystem::unique_path("/tmp/arrow-hdfs/scratch-%%%%").native(); + boost::filesystem::unique_path("/tmp/arrow-hdfs/scratch-%%%%").string(); loaded_driver_ = false; diff --git a/cpp/src/arrow/ipc/json-integration-test.cc b/cpp/src/arrow/ipc/json-integration-test.cc index 5e593560f8c..757e6c00ab2 100644 --- a/cpp/src/arrow/ipc/json-integration-test.cc +++ b/cpp/src/arrow/ipc/json-integration-test.cc @@ -221,7 +221,7 @@ Status RunCommand(const std::string& json_path, const std::string& arrow_path, } static std::string temp_path() { - return (fs::temp_directory_path() / fs::unique_path()).native(); + return (fs::temp_directory_path() / fs::unique_path()).string(); } class TestJSONIntegration : public ::testing::Test {