Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-323: [Python] Opt-in to pyarrow.parquet extension rather than attempting and failing silently #194

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
option(PYARROW_BUILD_TESTS
"Build the PyArrow C++ googletest unit tests"
OFF)
option(PYARROW_BUILD_PARQUET
"Build the PyArrow Parquet integration"
OFF)
endif()

find_program(CCACHE_FOUND ccache)
Expand Down Expand Up @@ -445,7 +448,10 @@ set(LINK_LIBS
arrow_ipc
)

if(PARQUET_FOUND AND PARQUET_ARROW_FOUND)
if (PYARROW_BUILD_PARQUET)
if(NOT (PARQUET_FOUND AND PARQUET_ARROW_FOUND))
message(FATAL_ERROR "Unable to locate Parquet libraries")
endif()
ADD_THIRDPARTY_LIB(parquet_arrow
SHARED_LIB ${PARQUET_ARROW_SHARED_LIB})
set(LINK_LIBS
Expand Down
14 changes: 13 additions & 1 deletion python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ python setup.py build_ext --inplace
py.test pyarrow
```

To change the build type, use the `--build-type` option:
To change the build type, use the `--build-type` option or set
`$PYARROW_BUILD_TYPE`:

```bash
python setup.py build_ext --build-type=release --inplace
Expand All @@ -57,9 +58,20 @@ python setup.py build_ext --build-type=release --inplace
To pass through other build options to CMake, set the environment variable
`$PYARROW_CMAKE_OPTIONS`.

#### Build the pyarrow Parquet file extension

To build the integration with [parquet-cpp][1], pass `--with-parquet` to
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will only work on the build_ext command

setup.py or add `-DPYARROW_BUILD_PARQUET=on` to the general CMake options.

```
export PYARROW_CMAKE_OPTIONS=-DPYARROW_BUILD_PARQUET=on
```

#### Build the documentation

```bash
pip install -r doc/requirements.txt
python setup.py build_sphinx
```

[1]: https://github.com/apache/parquet-cpp
38 changes: 24 additions & 14 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,15 @@ def run(self):

description = "Build the C-extensions for arrow"
user_options = ([('extra-cmake-args=', None, 'extra arguments for CMake'),
('build-type=', None, 'build type (debug or release)')]
+ _build_ext.user_options)
('build-type=', None, 'build type (debug or release)'),
('with-parquet', None, 'build the Parquet extension')] +
_build_ext.user_options)

def initialize_options(self):
_build_ext.initialize_options(self)
self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '')
self.build_type = os.environ.get('PYARROW_BUILD_TYPE', 'debug').lower()
self.with_parquet = False

CYTHON_MODULE_NAMES = [
'array',
Expand All @@ -116,8 +118,6 @@ def initialize_options(self):
'schema',
'table']

CYTHON_ALLOWED_FAILURES = ['parquet']

def _run_cmake(self):
# The directory containing this setup.py
source = osp.dirname(osp.abspath(__file__))
Expand All @@ -141,17 +141,24 @@ def _run_cmake(self):
if (cachedir != build_temp):
return

pyexe_option = '-DPYTHON_EXECUTABLE=%s' % sys.executable
static_lib_option = ''
build_tests_option = ''

build_type_option = '-DCMAKE_BUILD_TYPE={0}'.format(self.build_type)
cmake_options = [
'-DPYTHON_EXECUTABLE=%s' % sys.executable,
static_lib_option,
build_tests_option,
]

if self.with_parquet:
cmake_options.append('-DPYARROW_BUILD_PARQUET=on')

if sys.platform != 'win32':
cmake_command = ['cmake', self.extra_cmake_args, pyexe_option,
build_tests_option,
build_type_option,
static_lib_option, source]
cmake_options.append('-DCMAKE_BUILD_TYPE={0}'
.format(self.build_type))

cmake_command = (['cmake', self.extra_cmake_args] +
cmake_options + [source])

self.spawn(cmake_command)
args = ['make', 'VERBOSE=1']
Expand All @@ -166,10 +173,8 @@ def _run_cmake(self):
# Generate the build files
extra_cmake_args = shlex.split(self.extra_cmake_args)
cmake_command = (['cmake'] + extra_cmake_args +
cmake_options +
[source,
pyexe_option,
static_lib_option,
build_tests_option,
'-G', cmake_generator])
if "-G" in self.extra_cmake_args:
cmake_command = cmake_command[:-2]
Expand Down Expand Up @@ -202,7 +207,7 @@ def _run_cmake(self):
built_path = self.get_ext_built(name)
if not os.path.exists(built_path):
print(built_path)
if name in self.CYTHON_ALLOWED_FAILURES:
if self._failure_permitted(name):
print('Cython module {0} failure permitted'.format(name))
continue
raise RuntimeError('libpyarrow C-extension failed to build:',
Expand All @@ -219,6 +224,11 @@ def _run_cmake(self):

os.chdir(saved_cwd)

def _failure_permitted(self, name):
if name == 'parquet' and not self.with_parquet:
return True
return False

def _get_inplace_dir(self):
pass

Expand Down