diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat index 2a537769f82fc..04fe2ab62cbd4 100644 --- a/ci/msvc-build.bat +++ b/ci/msvc-build.bat @@ -114,6 +114,9 @@ popd @rem see PARQUET-1018 pushd python + +set PYARROW_CXXFLAGS=/WX python setup.py build_ext --inplace --with-parquet --bundle-arrow-cpp bdist_wheel || exit /B py.test pyarrow -v -s --parquet || exit /B + popd diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh index 907bc60cd719e..9135aaf38e4e7 100755 --- a/ci/travis_script_python.sh +++ b/ci/travis_script_python.sh @@ -23,6 +23,7 @@ source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh export ARROW_HOME=$ARROW_CPP_INSTALL export PARQUET_HOME=$TRAVIS_BUILD_DIR/parquet-env export LD_LIBRARY_PATH=$ARROW_HOME/lib:$PARQUET_HOME/lib:$LD_LIBRARY_PATH +export PYARROW_CXXFLAGS="-Werror" build_parquet_cpp() { export PARQUET_ARROW_VERSION=$(git rev-parse HEAD) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 846e4dd5f6eaf..bfae157ed6b9c 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -57,6 +57,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF) + set(PYARROW_CXXFLAGS "" CACHE STRING + "Compiler flags to append when compiling Arrow") endif() find_program(CCACHE_FOUND ccache) @@ -75,6 +77,7 @@ include(CompilerInfo) # Add common flags set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}") if (NOT MSVC) # Enable perf and other tools to work properly @@ -82,6 +85,13 @@ if (NOT MSVC) # Suppress Cython warnings set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable") +else() + # MSVC version of -Wno-return-type-c-linkage + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4190") + + # Cython generates some bitshift expressions that MSVC does not like in + # __Pyx_PyFloat_DivideObjC + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4293") endif() if ("${COMPILER_FAMILY}" STREQUAL "clang") @@ -95,6 +105,7 @@ if ("${COMPILER_FAMILY}" STREQUAL "clang") # Cython warnings in clang set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constant-logical-operand") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-declarations") # We have public Cython APIs which return C++ types, which are in an extern # "C" blog (no symbol mangling) and clang doesn't like this diff --git a/python/doc/source/development.rst b/python/doc/source/development.rst index 55b3efdad17c6..d0a1c544dd091 100644 --- a/python/doc/source/development.rst +++ b/python/doc/source/development.rst @@ -267,7 +267,6 @@ Now, we build and install Arrow C++ libraries -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^ -DCMAKE_BUILD_TYPE=Release ^ -DARROW_BUILD_TESTS=off ^ - -DARROW_ZLIB_VENDORED=off ^ -DARROW_PYTHON=on .. cmake --build . --target INSTALL --config Release cd ..\.. diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index aef661818f401..c940122da5dcf 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -636,7 +636,7 @@ cdef class ParquetWriter: elif row_group_size == 0: raise ValueError('Row group size cannot be 0') - cdef int c_row_group_size = row_group_size + cdef int64_t c_row_group_size = row_group_size with nogil: check_status(self.writer.get() diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index ffe867b0af0f5..db6770f586bd5 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -369,7 +369,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: shared_ptr[CTable]* table) int num_columns() - int num_rows() + int64_t num_rows() c_bool Equals(const CTable& other) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 9e4e9078ceb35..01c987d286f92 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -923,7 +923,7 @@ cdef class _HdfsClient: cdef c_string c_path = tobytes(path) with nogil: check_status(self.client.get() - .Delete(c_path, recursive)) + .Delete(c_path, recursive == 1)) def open(self, path, mode='rb', buffer_size=None, replication=None, default_block_size=None): diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index dec5341ca4ad1..1f72070cb7e12 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -169,7 +169,6 @@ cdef class Time64Value(ArrayValue): CTime64Type* dtype = ap.type().get() cdef int64_t val = ap.Value(self.index) - print(val) if dtype.unit() == TimeUnit_MICRO: return (datetime.datetime(1970, 1, 1) + datetime.timedelta(microseconds=val)).time() diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index c1d5a50d487a3..6277761b7d6ec 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -286,7 +286,7 @@ cdef int _schema_from_arrays( c_string c_name vector[shared_ptr[CField]] fields shared_ptr[CDataType] type_ - int K = len(arrays) + Py_ssize_t K = len(arrays) fields.resize(K) @@ -733,7 +733,7 @@ cdef class Table: vector[shared_ptr[CColumn]] columns shared_ptr[CSchema] schema shared_ptr[CTable] table - size_t K = len(arrays) + int i, K = len(arrays) _schema_from_arrays(arrays, names, metadata, &schema) @@ -841,7 +841,7 @@ cdef class Table: self._check_nullptr() return pyarrow_wrap_schema(self.table.schema()) - def column(self, int64_t i): + def column(self, int i): """ Select a column by its numeric index. @@ -855,8 +855,8 @@ cdef class Table: """ cdef: Column column = Column() - int64_t num_columns = self.num_columns - int64_t index + int num_columns = self.num_columns + int index self._check_nullptr() if not -num_columns <= i < num_columns: diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index a8d7aa0ee8122..fefde55bc2f95 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -281,12 +281,12 @@ cdef class Schema: def __len__(self): return self.schema.num_fields() - def __getitem__(self, int64_t i): + def __getitem__(self, int i): cdef: Field result = Field() - int64_t num_fields = self.schema.num_fields() - int64_t index + int num_fields = self.schema.num_fields() + int index if not -num_fields <= i < num_fields: raise IndexError( @@ -456,7 +456,7 @@ def field(name, DataType type, bint nullable=True, dict metadata=None): convert_metadata(metadata, &c_meta) result.sp_field.reset(new CField(tobytes(name), type.sp_type, - nullable, c_meta)) + nullable == 1, c_meta)) result.field = result.sp_field.get() result.type = type return result diff --git a/python/setup.py b/python/setup.py index 801cd17f440ec..ebf28cc64e990 100644 --- a/python/setup.py +++ b/python/setup.py @@ -92,6 +92,8 @@ def initialize_options(self): self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '') self.build_type = os.environ.get('PYARROW_BUILD_TYPE', 'debug').lower() + self.cmake_cxxflags = os.environ.get('PYARROW_CXXFLAGS', '') + if sys.platform == 'win32': # Cannot do debug builds in Windows unless Python itself is a debug # build @@ -146,6 +148,10 @@ def _run_cmake(self): if self.with_plasma: cmake_options.append('-DPYARROW_BUILD_PLASMA=on') + if len(self.cmake_cxxflags) > 0: + cmake_options.append('-DPYARROW_CXXFLAGS="{0}"' + .format(self.cmake_cxxflags)) + if self.bundle_arrow_cpp: cmake_options.append('-DPYARROW_BUNDLE_ARROW_CPP=ON') # ARROW-1090: work around CMake rough edges