Skip to content

Commit

Permalink
ARROW-4446: [C++][Python] Run Gandiva C++ unit tests in Appveyor, get…
Browse files Browse the repository at this point in the history
… build and tests working in Python

Resolves

* ARROW-4014 Fix warning about LIBCMT in MSVC build
* ARROW-4445 Run Gandiva C++ tests in Appveyor
* ARROW-4446 Run Gandiva Python unit tests in Appveyor
* ARROW-4404 Appveyor toolchain build does not actually build the project

Author: Wes McKinney <wesm+git@apache.org>

Closes #3567 from wesm/gandiva-python-windows and squashes the following commits:

d9e4b1b <Wes McKinney> Only use /nodefaultlib:libcmt if arrow_use_static_crt=OFF
a8f960d <Wes McKinney> Fix appyveyor.yml
72c97e2 <Wes McKinney> Re-enable all Appyveyor entries
a2e57d1 <Wes McKinney> Prevent move_shared_libs from clobbering the gandiva.cpp file which downstream code is expecting to find
d78ae13 <Wes McKinney> Debug prints
ac402f0 <Wes McKinney> Set language_level = 3
8ae63f1 <Wes McKinney> Fix LLVM 7 issues with MSVC, enable FindClangTools.cmake to find clang-format in conda env
3677115 <Wes McKinney> Do not do verbose linking
773c731 <Wes McKinney> Fixes
00d4126 <Wes McKinney> Disable failing Windows tests
063543c <Wes McKinney> Don't touch linker flags unless on MSVC
e19b688 <Wes McKinney> Upgrade to LLVM 7
707bc49 <Wes McKinney> Fix for gflags
5897045 <Wes McKinney> Retool FindGFlags.cmake a bit to try to fix Windows toolchain build
c2bb420 <Wes McKinney> Install all conda toolchain packages in one go
c285776 <Wes McKinney> Rearrange conda calls to see if fixes build
469a2df <Wes McKinney> Enable gandiva is one build entry
b05cf08 <Wes McKinney> Get Python build and tests for Gandiva working on, add to Appveyor build
  • Loading branch information
wesm committed Feb 8, 2019
1 parent 27ff2b3 commit 29f76df
Show file tree
Hide file tree
Showing 16 changed files with 163 additions and 76 deletions.
2 changes: 2 additions & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ environment:
global:
USE_CLCACHE: true
ARROW_BUILD_GANDIVA: "OFF"
ARROW_LLVM_VERSION: "7.0.*"
PYTHON: "3.6"
ARCH: "64"

Expand All @@ -61,6 +62,7 @@ environment:
- JOB: "Toolchain"
GENERATOR: Visual Studio 14 2015 Win64
CONFIGURATION: "Release"
ARROW_BUILD_GANDIVA: "ON"
- JOB: "Static_Crt_Build"
GENERATOR: Ninja
- JOB: "Build_Debug"
Expand Down
31 changes: 11 additions & 20 deletions ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -95,33 +95,24 @@ if "%JOB%" == "Build_Debug" (
exit /B 0
)

conda create -n arrow -q -y -c conda-forge ^
--file=ci\conda_env_python.yml ^
python=%PYTHON% ^
numpy=1.14 ^
thrift-cpp=0.11 ^
boost-cpp

call activate arrow

set ARROW_LLVM_VERSION=6.0.1
set CONDA_PACKAGES=--file=ci\conda_env_python.yml python=%PYTHON% numpy=1.14 thrift-cpp=0.11 boost-cpp

if "%ARROW_BUILD_GANDIVA%" == "ON" (
@rem Install llvmdev in the toolchain if building gandiva.dll
conda install -q -y llvmdev=%ARROW_LLVM_VERSION% || exit /B
set CONDA_PACKAGES=%CONDA_PACKAGES% llvmdev=%ARROW_LLVM_VERSION% clangdev=%ARROW_LLVM_VERSION%
)

@rem Use Boost from Anaconda
set BOOST_ROOT=%CONDA_PREFIX%\Library
set BOOST_LIBRARYDIR=%CONDA_PREFIX%\Library\lib

if "%JOB%" == "Toolchain" (
@rem Install pre-built "toolchain" packages for faster builds
conda install -q -y -c conda-forge ^
--file=ci\conda_env_cpp.yml ^
python=%PYTHON%

set ARROW_BUILD_TOOLCHAIN=%CONDA_PREFIX%\Library
set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.yml
)

conda create -n arrow -q -y %CONDA_PACKAGES% -c conda-forge

call activate arrow

@rem Use Boost from Anaconda
set BOOST_ROOT=%CONDA_PREFIX%\Library
set BOOST_LIBRARYDIR=%CONDA_PREFIX%\Library\lib

call ci\cpp-msvc-build-main.bat
4 changes: 3 additions & 1 deletion ci/cpp-msvc-build-main.bat
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ set ARROW_HOME=%CONDA_PREFIX%\Library
set CMAKE_ARGS=-DARROW_VERBOSE_THIRDPARTY_BUILD=OFF

if "%JOB%" == "Toolchain" (
set CMAKE_ARGS=%CMAKE_ARGS% -DARROW_WITH_BZ2=ON
set CMAKE_ARGS=%CMAKE_ARGS% -DARROW_WITH_BZ2=ON
set ARROW_BUILD_TOOLCHAIN=%CONDA_PREFIX%\Library
)

@rem Retrieve git submodules, configure env var for Parquet unit tests
Expand Down Expand Up @@ -85,6 +86,7 @@ set PYARROW_BUNDLE_ARROW_CPP=ON
set PYARROW_BUNDLE_BOOST=OFF
set PYARROW_WITH_STATIC_BOOST=ON
set PYARROW_WITH_PARQUET=ON
set PYARROW_WITH_GANDIVA=%ARROW_BUILD_GANDIVA%
set PYARROW_PARALLEL=2

@rem ARROW-3075; pkgconfig is broken for Parquet for now
Expand Down
4 changes: 4 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,10 @@ Note that this requires linking Boost statically"
# Windows options

if (MSVC)
option(MSVC_LINK_VERBOSE
"Pass verbose linking options when linking libraries and executables"
OFF)

option(ARROW_USE_CLCACHE
"Use clcache if available"
ON)
Expand Down
6 changes: 4 additions & 2 deletions cpp/cmake_modules/FindClangTools.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ set(CLANG_TOOLS_SEARCH_PATHS
${ClangTools_PATH}
$ENV{CLANG_TOOLS_PATH}
/usr/local/bin /usr/bin
"C:/Program Files/LLVM/bin"
"${HOMEBREW_PREFIX}/bin")
"C:/Program Files/LLVM/bin" # Windows, non-conda
"$ENV{CONDA_PREFIX}/Library/bin" # Windows, conda
"${HOMEBREW_PREFIX}/bin"
)

find_program(CLANG_TIDY_BIN
NAMES clang-tidy-${ARROW_LLVM_VERSION}
Expand Down
23 changes: 16 additions & 7 deletions cpp/cmake_modules/FindGFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,18 @@ set(GFLAGS_STATIC_LIB_SUFFIX
set(GFLAGS_STATIC_LIB_NAME
${CMAKE_STATIC_LIBRARY_PREFIX}gflags${GFLAGS_STATIC_LIB_SUFFIX})

message(STATUS "GFLAGS_HOME: ${GFLAGS_HOME}")

if ( _gflags_roots )
find_path(GFLAGS_INCLUDE_DIR NAMES gflags/gflags.h
PATHS ${_gflags_roots}
NO_DEFAULT_PATH
PATH_SUFFIXES "include" )
find_library(GFLAGS_SHARED_LIB NAMES gflags
find_library(GFLAGS_STATIC_LIB NAMES ${GFLAGS_STATIC_LIB_NAME}
PATHS ${_gflags_roots}
NO_DEFAULT_PATH
PATH_SUFFIXES "lib" )
find_library(GFLAGS_STATIC_LIB NAMES ${GFLAGS_STATIC_LIB_NAME}
find_library(GFLAGS_SHARED_LIB NAMES gflags
PATHS ${_gflags_roots}
NO_DEFAULT_PATH
PATH_SUFFIXES "lib" )
Expand All @@ -57,14 +59,21 @@ else()
# make sure we don't accidentally pick up a different version
NO_CMAKE_SYSTEM_PATH
NO_SYSTEM_ENVIRONMENT_PATH)
find_library(GFLAGS_SHARED_LIB gflags
find_library(GFLAGS_STATIC_LIB ${GFLAGS_STATIC_LIB_NAME}
NO_CMAKE_SYSTEM_PATH
NO_SYSTEM_ENVIRONMENT_PATH)
find_library(GFLAGS_STATIC_LIB ${GFLAGS_STATIC_LIB_NAME}
find_library(GFLAGS_SHARED_LIB gflags
NO_CMAKE_SYSTEM_PATH
NO_SYSTEM_ENVIRONMENT_PATH)
endif()

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(GFLAGS REQUIRED_VARS
GFLAGS_SHARED_LIB GFLAGS_STATIC_LIB GFLAGS_INCLUDE_DIR)
if (GFLAGS_INCLUDE_DIR AND GFLAGS_STATIC_LIB)
set(GFLAGS_FOUND TRUE)
else ()
set(GFLAGS_FOUND FALSE)
endif ()

mark_as_advanced(
GFLAGS_INCLUDE_DIR
GFLAGS_STATIC_LIB
)
30 changes: 20 additions & 10 deletions cpp/cmake_modules/FindGandiva.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,7 @@ else()
${GANDIVA_HOME}/include
)

set(GANDIVA_SEARCH_LIB_PATH
${GANDIVA_HOME}/lib
)
set(GANDIVA_SEARCH_LIB_PATH "${GANDIVA_HOME}")

find_path(GANDIVA_INCLUDE_DIR gandiva/expression_registry.h PATHS
${GANDIVA_SEARCH_HEADER_PATHS}
Expand All @@ -57,18 +55,30 @@ else()
endif()

find_library(GANDIVA_LIB_PATH NAMES gandiva
PATHS
${GANDIVA_SEARCH_LIB_PATH}
NO_DEFAULT_PATH)
PATHS ${GANDIVA_SEARCH_LIB_PATH}
NO_DEFAULT_PATH
PATH_SUFFIXES "lib")
get_filename_component(GANDIVA_LIBS ${GANDIVA_LIB_PATH} DIRECTORY)

if (GANDIVA_INCLUDE_DIR AND GANDIVA_LIBS)
find_library(GANDIVA_SHARED_LIB_PATH NAMES gandiva
PATHS ${GANDIVA_SEARCH_LIB_PATH}
NO_DEFAULT_PATH
PATH_SUFFIXES "bin" )

get_filename_component(GANDIVA_SHARED_LIBS ${GANDIVA_SHARED_LIB_PATH} PATH )

if (GANDIVA_INCLUDE_DIR AND GANDIVA_LIBS AND GANDIVA_SHARED_LIBS)
set(GANDIVA_FOUND TRUE)
set(GANDIVA_LIB_NAME gandiva)

set(GANDIVA_STATIC_LIB ${GANDIVA_LIBS}/lib${GANDIVA_LIB_NAME}.a)

set(GANDIVA_SHARED_LIB ${GANDIVA_LIBS}/lib${GANDIVA_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
if (MSVC)
set(GANDIVA_STATIC_LIB "${GANDIVA_LIBS}/${GANDIVA_LIB_NAME}${GANDIVA_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(GANDIVA_SHARED_LIB "${GANDIVA_SHARED_LIBS}/${GANDIVA_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}")
set(GANDIVA_SHARED_IMP_LIB "${GANDIVA_LIBS}/${GANDIVA_LIB_NAME}.lib")
else()
set(GANDIVA_STATIC_LIB ${GANDIVA_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
set(GANDIVA_SHARED_LIB ${GANDIVA_SHARED_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
endif()

if (GANDIVA_FOUND)
Expand Down
7 changes: 6 additions & 1 deletion cpp/cmake_modules/FindParquet.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ if(NOT "$ENV{PARQUET_HOME}" STREQUAL "")
set(PARQUET_HOME "$ENV{PARQUET_HOME}")
endif()

if(NOT "$ENV{ARROW_HOME}" STREQUAL "")
set(ARROW_HOME "$ENV{ARROW_HOME}")
endif()

if (MSVC)
SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll")

Expand All @@ -38,7 +42,8 @@ if (MSVC)
endif()

find_library(PARQUET_SHARED_LIBRARIES NAMES parquet
PATHS ${PARQUET_HOME} NO_DEFAULT_PATH
PATHS ${PARQUET_HOME} ${ARROW_HOME}
NO_DEFAULT_PATH
PATH_SUFFIXES "bin" )

get_filename_component(PARQUET_SHARED_LIBS ${PARQUET_SHARED_LIBRARIES} PATH )
Expand Down
19 changes: 19 additions & 0 deletions cpp/cmake_modules/SetupCxxFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -411,3 +411,22 @@ else()
endif()

message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}")

# ----------------------------------------------------------------------
# MSVC-specific linker options

if (MSVC)
set(MSVC_LINKER_FLAGS)
if (MSVC_LINK_VERBOSE)
set(MSVC_LINKER_FLAGS "${MSVC_LINKER_FLAGS} /VERBOSE:LIB")
endif()
if (NOT ARROW_USE_STATIC_CRT)
set(MSVC_LINKER_FLAGS "${MSVC_LINKER_FLAGS} /NODEFAULTLIB:LIBCMT")
set(CMAKE_EXE_LINKER_FLAGS
"${CMAKE_EXE_LINKER_FLAGS} ${MSVC_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS
"${CMAKE_MODULE_LINKER_FLAGS} ${MSVC_LINKER_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS
"${CMAKE_SHARED_LINKER_FLAGS} ${MSVC_LINKER_FLAGS}")
endif()
endif()
6 changes: 6 additions & 0 deletions cpp/src/gandiva/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
// specific language governing permissions and limitations
// under the License.

// TODO(wesm): LLVM 7 produces pesky C4244 that disable pragmas around the LLVM
// includes seem to not fix as with LLVM 6
#if defined(_MSC_VER)
#pragma warning(disable : 4244)
#endif

#include "gandiva/engine.h"

#include <iostream>
Expand Down
1 change: 1 addition & 0 deletions cpp/src/gandiva/expression_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class GANDIVA_EXPORT ExpressionRegistry {
std::unique_ptr<FunctionRegistry> function_registry_;
};

GANDIVA_EXPORT
std::vector<std::shared_ptr<FunctionSignature>> GetRegisteredFunctionSignatures();

} // namespace gandiva
Expand Down
16 changes: 16 additions & 0 deletions cpp/src/gandiva/precompiled/time_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,27 @@ TEST(TestTime, TestCastTimestamp) {
-1187308799080);
EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1857-02-11 20:31:40.920 -05:30", 30),
-3562264699080);
}

#ifndef _WIN32

// TODO(wesm): ARROW-4495. Need to address TZ database issues on Windows

TEST(TestTime, TestCastTimestampWithTZ) {
ExecutionContext context;
int64_t context_ptr = reinterpret_cast<int64_t>(&context);

EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920 Canada/Pacific", 37),
969727530920);
EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2012-02-28 23:30:59 Asia/Kolkata", 32),
1330452059000);
EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1923-10-07 03:03:03 America/New_York", 36),
-1459094217000);
}

TEST(TestTime, TestCastTimestampErrors) {
ExecutionContext context;
int64_t context_ptr = reinterpret_cast<int64_t>(&context);

// error cases
EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "20000923", 8), 0);
Expand All @@ -93,6 +107,8 @@ TEST(TestTime, TestCastTimestamp) {
context.Reset();
}

#endif

TEST(TestTime, TestExtractTime) {
// 10:20:33
int32 time_as_millis_in_day = 37233000;
Expand Down
2 changes: 2 additions & 0 deletions docs/source/python/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,8 @@ Now, we build and install Arrow C++ libraries
cmake --build . --target INSTALL --config Release
cd ..\..
If building with LLVM, also add `-DARROW_GANDIVA=ON`.

After that, we must put the install directory's bin path in our ``%PATH%``:

.. code-block:: shell
Expand Down
15 changes: 13 additions & 2 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -517,12 +517,23 @@ if (PYARROW_BUILD_GANDIVA)
bundle_arrow_lib(GANDIVA_SHARED_LIB
ABI_VERSION ${ARROW_ABI_VERSION}
SO_VERSION ${ARROW_SO_VERSION})

if (MSVC)
bundle_arrow_implib(GANDIVA_SHARED_IMP_LIB)
endif()
endif()

if (MSVC)
ADD_THIRDPARTY_LIB(gandiva
SHARED_LIB ${GANDIVA_SHARED_IMP_LIB})
else()
ADD_THIRDPARTY_LIB(gandiva
SHARED_LIB ${GANDIVA_SHARED_LIB})
endif()

set(LINK_LIBS
${LINK_LIBS}
${GANDIVA_SHARED_LIB})

gandiva_shared)
set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} gandiva)
endif()

Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/gandiva.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

from libcpp cimport bool as c_bool, nullptr
from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
Expand Down
Loading

0 comments on commit 29f76df

Please sign in to comment.