Skip to content

Commit

Permalink
ARROW-203: Python: Basic filename based Parquet read/write
Browse files Browse the repository at this point in the history
Author: Uwe L. Korn <uwelk@xhochy.com>

Closes #83 from xhochy/arrow-203 and squashes the following commits:

405f85d [Uwe L. Korn] Remove FindParquet duplication
38d786c [Uwe L. Korn] Make code more readable by using using
ec07768 [Uwe L. Korn] Set LD_LIBRARY_PATH in python build
8d90d3f [Uwe L. Korn] Do not set LD_LIBRARY_PATH in python build
000e1e3 [Uwe L. Korn] Use unique_ptr and shared_ptr from Cython
8f6010a [Uwe L. Korn] Linter fixes
0514d01 [Uwe L. Korn] Handle exceptions on RowGroupWriter::Close better
77bd21a [Uwe L. Korn] Add pandas roundtrip to tests
f583b61 [Uwe L. Korn] Fix rpath for libarrow_parquet
00c1461 [Uwe L. Korn] Also ensure correct OSX compiler flags in PyArrow
4a80116 [Uwe L. Korn] Handle Python3 strings correctly
066c08a [Uwe L. Korn] Add missing functions to smart pointers
5706db2 [Uwe L. Korn] Use length and offset instead of slicing
443de8b [Uwe L. Korn] Add miniconda to the LD_LIBRARY_PATH
2dffc14 [Uwe L. Korn] Fix min mistake, use equals instead of ==
2006e70 [Uwe L. Korn] Rewrite test py.test style
9520c39 [Uwe L. Korn] Use PARQUET from miniconda path
cd3b9a9 [Uwe L. Korn] Also search for Parquet in PyArrow
6a41d23 [Uwe L. Korn] Re-use conda installation from C++
81f501e [Uwe L. Korn] No need to install conda in travis_script_python anymore
b505feb [Uwe L. Korn] Install parquet-cpp via conda
5d4929a [Uwe L. Korn] Add test-util.h
9b06e41 [Uwe L. Korn] Make tests templated
be6415c [Uwe L. Korn] Incorportate review comments
0fbed3f [Uwe L. Korn] Remove obsolete parquet files
081db5f [Uwe L. Korn] Limit and document chunk_size
7192cfb [Uwe L. Korn] Add const to slicing parameters
0463995 [Uwe L. Korn] ARROW-203: Python: Basic filename based Parquet read/write
  • Loading branch information
xhochy authored and wesm committed Jun 10, 2016
1 parent 8197f24 commit ec66ddd
Show file tree
Hide file tree
Showing 27 changed files with 654 additions and 144 deletions.
6 changes: 5 additions & 1 deletion ci/travis_before_script_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

set -e

source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
conda install -y --channel apache/channel/dev parquet-cpp
export PARQUET_HOME=$MINICONDA

: ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}

mkdir $CPP_BUILD_DIR
Expand All @@ -19,7 +23,7 @@ echo $GTEST_HOME

: ${ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install}

CMAKE_COMMON_FLAGS="-DARROW_BUILD_BENCHMARKS=ON -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL"
CMAKE_COMMON_FLAGS="-DARROW_BUILD_BENCHMARKS=ON -DARROW_PARQUET=ON -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL"

if [ $TRAVIS_OS_NAME == "linux" ]; then
cmake -DARROW_TEST_MEMCHECK=on $CMAKE_COMMON_FLAGS -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
Expand Down
22 changes: 1 addition & 21 deletions ci/travis_conda_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,7 @@

set -e

if [ $TRAVIS_OS_NAME == "linux" ]; then
MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh"
else
MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh"
fi

wget -O miniconda.sh $MINICONDA_URL
MINICONDA=$TRAVIS_BUILD_DIR/miniconda
bash miniconda.sh -b -p $MINICONDA
export PATH="$MINICONDA/bin:$PATH"
conda update -y -q conda
conda info -a

conda config --set show_channel_urls yes
conda config --add channels conda-forge
conda config --add channels apache

conda install --yes conda-build jinja2 anaconda-client

# faster builds, please
conda install -y nomkl
source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh

# Build libarrow

Expand Down
26 changes: 26 additions & 0 deletions ci/travis_install_conda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

set -e

if [ $TRAVIS_OS_NAME == "linux" ]; then
MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh"
else
MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh"
fi

wget -O miniconda.sh $MINICONDA_URL
export MINICONDA=$TRAVIS_BUILD_DIR/miniconda
bash miniconda.sh -b -p $MINICONDA
export PATH="$MINICONDA/bin:$PATH"
conda update -y -q conda
conda info -a

conda config --set show_channel_urls yes
conda config --add channels conda-forge
conda config --add channels apache

conda install --yes conda-build jinja2 anaconda-client

# faster builds, please
conda install -y nomkl

21 changes: 6 additions & 15 deletions ci/travis_script_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,19 @@ set -e

PYTHON_DIR=$TRAVIS_BUILD_DIR/python

# Re-use conda installation from C++
export MINICONDA=$TRAVIS_BUILD_DIR/miniconda
export PATH="$MINICONDA/bin:$PATH"
export LD_LIBRARY_PATH="$MINICONDA/lib:$LD_LIBRARY_PATH"
export PARQUET_HOME=$MINICONDA

# Share environment with C++
pushd $CPP_BUILD_DIR
source setup_build_env.sh
popd

pushd $PYTHON_DIR

# Bootstrap a Conda Python environment

if [ $TRAVIS_OS_NAME == "linux" ]; then
MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh"
else
MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh"
fi

curl $MINICONDA_URL > miniconda.sh
MINICONDA=$TRAVIS_BUILD_DIR/miniconda
bash miniconda.sh -b -p $MINICONDA
export PATH="$MINICONDA/bin:$PATH"
conda update -y -q conda
conda info -a

python_version_tests() {
PYTHON_VERSION=$1
CONDA_ENV_NAME="pyarrow-test-${PYTHON_VERSION}"
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class Column {

int64_t null_count() const { return data_->null_count(); }

const std::shared_ptr<Field>& field() const { return field_; }

// @returns: the column's name in the passed metadata
const std::string& name() const { return field_->name; }

Expand Down
7 changes: 7 additions & 0 deletions cpp/src/arrow/parquet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ add_library(arrow_parquet SHARED
target_link_libraries(arrow_parquet ${PARQUET_LIBS})
SET_TARGET_PROPERTIES(arrow_parquet PROPERTIES LINKER_LANGUAGE CXX)

if (APPLE)
set_target_properties(arrow_parquet
PROPERTIES
BUILD_WITH_INSTALL_RPATH ON
INSTALL_NAME_DIR "@rpath")
endif()

ADD_ARROW_TEST(parquet-schema-test)
ARROW_TEST_LINK_LIBRARIES(parquet-schema-test arrow_parquet)

Expand Down
Loading

0 comments on commit ec66ddd

Please sign in to comment.