Skip to content

Commit

Permalink
ARROW-323: [Python] Opt-in to pyarrow.parquet extension rather than a…
Browse files Browse the repository at this point in the history
…ttempting and failing silently

Added a couple ways to do this, either via the `--with-parquet` command line option (preferred) or by passing through an option to CMake

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #194 from wesm/ARROW-323 and squashes the following commits:

07c05cc [Wes McKinney] Update readme to illustrate proper use of  with build_ext
3bd9a8d [Wes McKinney] Add --with-parquet option to setup.py
374e254 [Wes McKinney] Add to README about building the parquet extension
cab55cb [Wes McKinney] Opt in to building the pyarrow.parquet extension, do not silently fail
  • Loading branch information
wesm committed Nov 3, 2016
1 parent 17c9ae7 commit 25e0106
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 16 deletions.
8 changes: 7 additions & 1 deletion python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
option(PYARROW_BUILD_TESTS
"Build the PyArrow C++ googletest unit tests"
OFF)
option(PYARROW_BUILD_PARQUET
"Build the PyArrow Parquet integration"
OFF)
endif()

find_program(CCACHE_FOUND ccache)
Expand Down Expand Up @@ -445,7 +448,10 @@ set(LINK_LIBS
arrow_ipc
)

if(PARQUET_FOUND AND PARQUET_ARROW_FOUND)
if (PYARROW_BUILD_PARQUET)
if(NOT (PARQUET_FOUND AND PARQUET_ARROW_FOUND))
message(FATAL_ERROR "Unable to locate Parquet libraries")
endif()
ADD_THIRDPARTY_LIB(parquet_arrow
SHARED_LIB ${PARQUET_ARROW_SHARED_LIB})
set(LINK_LIBS
Expand Down
20 changes: 19 additions & 1 deletion python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ python setup.py build_ext --inplace
py.test pyarrow
```

To change the build type, use the `--build-type` option:
To change the build type, use the `--build-type` option or set
`$PYARROW_BUILD_TYPE`:

```bash
python setup.py build_ext --build-type=release --inplace
Expand All @@ -57,9 +58,26 @@ python setup.py build_ext --build-type=release --inplace
To pass through other build options to CMake, set the environment variable
`$PYARROW_CMAKE_OPTIONS`.

#### Build the pyarrow Parquet file extension

To build the integration with [parquet-cpp][1], pass `--with-parquet` to
the `build_ext` option in setup.py:

```
python setup.py build_ext --with-parquet install
```

Alternately, add `-DPYARROW_BUILD_PARQUET=on` to the general CMake options.

```
export PYARROW_CMAKE_OPTIONS=-DPYARROW_BUILD_PARQUET=on
```

#### Build the documentation

```bash
pip install -r doc/requirements.txt
python setup.py build_sphinx
```

[1]: https://github.com/apache/parquet-cpp
38 changes: 24 additions & 14 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,15 @@ def run(self):

description = "Build the C-extensions for arrow"
user_options = ([('extra-cmake-args=', None, 'extra arguments for CMake'),
('build-type=', None, 'build type (debug or release)')]
+ _build_ext.user_options)
('build-type=', None, 'build type (debug or release)'),
('with-parquet', None, 'build the Parquet extension')] +
_build_ext.user_options)

def initialize_options(self):
_build_ext.initialize_options(self)
self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '')
self.build_type = os.environ.get('PYARROW_BUILD_TYPE', 'debug').lower()
self.with_parquet = False

CYTHON_MODULE_NAMES = [
'array',
Expand All @@ -116,8 +118,6 @@ def initialize_options(self):
'schema',
'table']

CYTHON_ALLOWED_FAILURES = ['parquet']

def _run_cmake(self):
# The directory containing this setup.py
source = osp.dirname(osp.abspath(__file__))
Expand All @@ -141,17 +141,24 @@ def _run_cmake(self):
if (cachedir != build_temp):
return

pyexe_option = '-DPYTHON_EXECUTABLE=%s' % sys.executable
static_lib_option = ''
build_tests_option = ''

build_type_option = '-DCMAKE_BUILD_TYPE={0}'.format(self.build_type)
cmake_options = [
'-DPYTHON_EXECUTABLE=%s' % sys.executable,
static_lib_option,
build_tests_option,
]

if self.with_parquet:
cmake_options.append('-DPYARROW_BUILD_PARQUET=on')

if sys.platform != 'win32':
cmake_command = ['cmake', self.extra_cmake_args, pyexe_option,
build_tests_option,
build_type_option,
static_lib_option, source]
cmake_options.append('-DCMAKE_BUILD_TYPE={0}'
.format(self.build_type))

cmake_command = (['cmake', self.extra_cmake_args] +
cmake_options + [source])

self.spawn(cmake_command)
args = ['make', 'VERBOSE=1']
Expand All @@ -166,10 +173,8 @@ def _run_cmake(self):
# Generate the build files
extra_cmake_args = shlex.split(self.extra_cmake_args)
cmake_command = (['cmake'] + extra_cmake_args +
cmake_options +
[source,
pyexe_option,
static_lib_option,
build_tests_option,
'-G', cmake_generator])
if "-G" in self.extra_cmake_args:
cmake_command = cmake_command[:-2]
Expand Down Expand Up @@ -202,7 +207,7 @@ def _run_cmake(self):
built_path = self.get_ext_built(name)
if not os.path.exists(built_path):
print(built_path)
if name in self.CYTHON_ALLOWED_FAILURES:
if self._failure_permitted(name):
print('Cython module {0} failure permitted'.format(name))
continue
raise RuntimeError('libpyarrow C-extension failed to build:',
Expand All @@ -219,6 +224,11 @@ def _run_cmake(self):

os.chdir(saved_cwd)

def _failure_permitted(self, name):
if name == 'parquet' and not self.with_parquet:
return True
return False

def _get_inplace_dir(self):
pass

Expand Down

0 comments on commit 25e0106

Please sign in to comment.