Skip to content

Commit

Permalink
ARROW-42: Add Python tests to Travis CI build
Browse files Browse the repository at this point in the history
Author: Wes McKinney <wesm@apache.org>

Closes #22 from wesm/ARROW-42 and squashes the following commits:

3b056a1 [Wes McKinney] Modularize Travis CI build and add Python build script. Remove parquet.pyx from Cython build for now, suppress -Wunused-variable in Cython compilation. Add missing formatting.py file
  • Loading branch information
wesm committed Mar 9, 2016
1 parent e822ea7 commit 8367527
Show file tree
Hide file tree
Showing 19 changed files with 228 additions and 75 deletions.
23 changes: 23 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ addons:
packages:
- gcc-4.9 # Needed for C++11
- g++-4.9 # Needed for C++11
- gdb
- gcov
- ccache
- cmake
- valgrind

Expand All @@ -17,11 +19,32 @@ matrix:
- compiler: gcc
language: cpp
os: linux
before_script:
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
script:
- export CC="gcc-4.9"
- export CXX="g++-4.9"
- $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh
- compiler: clang
language: cpp
os: osx
addons:
before_script:
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
script:
- $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh

before_install:
- ulimit -c unlimited -S
- export CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build
- export ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install
- export LD_LIBRARY_PATH=$ARROW_CPP_INSTALL/lib:$LD_LIBRARY_PATH

after_script:
- rm -rf $CPP_BUILD_DIR

after_failure:
- COREFILE=$(find . -maxdepth 2 -name "core*" | head -n 1)
- if [[ -f "$COREFILE" ]]; then gdb -c "$COREFILE" example -ex "thread apply all bt" -ex "set pagination 0" -batch; fi
26 changes: 26 additions & 0 deletions ci/travis_before_script_cpp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

set -e

: ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}

mkdir $CPP_BUILD_DIR
pushd $CPP_BUILD_DIR

CPP_DIR=$TRAVIS_BUILD_DIR/cpp

# Build an isolated thirdparty
cp -r $CPP_DIR/thirdparty .
cp $CPP_DIR/setup_build_env.sh .

source setup_build_env.sh

echo $GTEST_HOME

: ${ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install}

cmake -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
make -j4
make install

popd
22 changes: 2 additions & 20 deletions ci/travis_script_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,11 @@

set -e

mkdir $TRAVIS_BUILD_DIR/cpp-build
pushd $TRAVIS_BUILD_DIR/cpp-build
: ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}

CPP_DIR=$TRAVIS_BUILD_DIR/cpp
pushd $CPP_BUILD_DIR

# Build an isolated thirdparty
cp -r $CPP_DIR/thirdparty .
cp $CPP_DIR/setup_build_env.sh .

if [ $TRAVIS_OS_NAME == "linux" ]; then
# Use a C++11 compiler on Linux
export CC="gcc-4.9"
export CXX="g++-4.9"
fi

source setup_build_env.sh

echo $GTEST_HOME

cmake -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
make lint
make -j4

if [ $TRAVIS_OS_NAME == "linux" ]; then
valgrind --tool=memcheck --leak-check=yes --error-exitcode=1 ctest
Expand All @@ -32,4 +15,3 @@ else
fi

popd
rm -rf cpp-build
59 changes: 59 additions & 0 deletions ci/travis_script_python.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env bash

set -e

PYTHON_DIR=$TRAVIS_BUILD_DIR/python

# Share environment with C++
pushd $CPP_BUILD_DIR
source setup_build_env.sh
popd

pushd $PYTHON_DIR

# Bootstrap a Conda Python environment

if [ $TRAVIS_OS_NAME == "linux" ]; then
MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh"
else
MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh"
fi

curl $MINICONDA_URL > miniconda.sh
MINICONDA=$TRAVIS_BUILD_DIR/miniconda
bash miniconda.sh -b -p $MINICONDA
export PATH="$MINICONDA/bin:$PATH"
conda update -y -q conda
conda info -a

PYTHON_VERSION=3.5
CONDA_ENV_NAME=pyarrow-test

conda create -y -q -n $CONDA_ENV_NAME python=$PYTHON_VERSION
source activate $CONDA_ENV_NAME

python --version
which python

# faster builds, please
conda install -y nomkl

# Expensive dependencies install from Continuum package repo
conda install -y pip numpy pandas cython

# Other stuff pip install
pip install -r requirements.txt

export ARROW_HOME=$ARROW_CPP_INSTALL

python setup.py build_ext --inplace

py.test -vv -r sxX arrow

# if [ $TRAVIS_OS_NAME == "linux" ]; then
# valgrind --tool=memcheck py.test -vv -r sxX arrow
# else
# py.test -vv -r sxX arrow
# fi

popd
2 changes: 2 additions & 0 deletions cpp/src/arrow/table/column-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ using std::vector;

namespace arrow {

const auto INT32 = std::make_shared<Int32Type>();

class TestColumn : public TestBase {
protected:
std::shared_ptr<ChunkedArray> data_;
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/table/schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ using std::vector;

namespace arrow {

const auto INT32 = std::make_shared<Int32Type>();

TEST(TestField, Basics) {
shared_ptr<DataType> ftype = INT32;
shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/table/table-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ using std::vector;

namespace arrow {

const auto INT16 = std::make_shared<Int16Type>();
const auto UINT8 = std::make_shared<UInt8Type>();
const auto INT32 = std::make_shared<Int32Type>();

class TestTable : public TestBase {
public:
void MakeExample1(int length) {
Expand Down
14 changes: 0 additions & 14 deletions cpp/src/arrow/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,4 @@ std::string StructType::ToString() const {
return s.str();
}

const std::shared_ptr<NullType> NA = std::make_shared<NullType>();
const std::shared_ptr<BooleanType> BOOL = std::make_shared<BooleanType>();
const std::shared_ptr<UInt8Type> UINT8 = std::make_shared<UInt8Type>();
const std::shared_ptr<UInt16Type> UINT16 = std::make_shared<UInt16Type>();
const std::shared_ptr<UInt32Type> UINT32 = std::make_shared<UInt32Type>();
const std::shared_ptr<UInt64Type> UINT64 = std::make_shared<UInt64Type>();
const std::shared_ptr<Int8Type> INT8 = std::make_shared<Int8Type>();
const std::shared_ptr<Int16Type> INT16 = std::make_shared<Int16Type>();
const std::shared_ptr<Int32Type> INT32 = std::make_shared<Int32Type>();
const std::shared_ptr<Int64Type> INT64 = std::make_shared<Int64Type>();
const std::shared_ptr<FloatType> FLOAT = std::make_shared<FloatType>();
const std::shared_ptr<DoubleType> DOUBLE = std::make_shared<DoubleType>();
const std::shared_ptr<StringType> STRING = std::make_shared<StringType>();

} // namespace arrow
14 changes: 0 additions & 14 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,20 +338,6 @@ struct StructType : public DataType {
std::string ToString() const override;
};

extern const std::shared_ptr<NullType> NA;
extern const std::shared_ptr<BooleanType> BOOL;
extern const std::shared_ptr<UInt8Type> UINT8;
extern const std::shared_ptr<UInt16Type> UINT16;
extern const std::shared_ptr<UInt32Type> UINT32;
extern const std::shared_ptr<UInt64Type> UINT64;
extern const std::shared_ptr<Int8Type> INT8;
extern const std::shared_ptr<Int16Type> INT16;
extern const std::shared_ptr<Int32Type> INT32;
extern const std::shared_ptr<Int64Type> INT64;
extern const std::shared_ptr<FloatType> FLOAT;
extern const std::shared_ptr<DoubleType> DOUBLE;
extern const std::shared_ptr<StringType> STRING;

} // namespace arrow

#endif // ARROW_TYPE_H
2 changes: 0 additions & 2 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,6 @@ set(PYARROW_SRCS
)

set(LINK_LIBS
pyarrow_util
arrow
)

Expand All @@ -428,7 +427,6 @@ set(CYTHON_EXTENSIONS
array
config
error
parquet
scalar
schema
)
Expand Down
56 changes: 56 additions & 0 deletions python/arrow/formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Pretty-printing and other formatting utilities for Arrow data structures

import arrow.scalar as scalar


def array_format(arr, window=None):
values = []

if window is None or window * 2 >= len(arr):
for x in arr:
values.append(value_format(x, 0))
contents = _indent(',\n'.join(values), 2)
else:
for i in range(window):
values.append(value_format(arr[i], 0) + ',')
values.append('...')
for i in range(len(arr) - window, len(arr)):
formatted = value_format(arr[i], 0)
if i < len(arr) - 1:
formatted += ','
values.append(formatted)
contents = _indent('\n'.join(values), 2)

return '[\n{0}\n]'.format(contents)


def value_format(x, indent_level=0):
if isinstance(x, scalar.ListValue):
contents = ',\n'.join(value_format(item) for item in x)
return '[{0}]'.format(_indent(contents, 1).strip())
else:
return repr(x)


def _indent(text, spaces):
if spaces == 0:
return text
block = ' ' * spaces
return '\n'.join(block + x for x in text.split('\n'))
5 changes: 5 additions & 0 deletions python/cmake_modules/UseCython.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ function( compile_pyx _name pyx_target_name generated_files pyx_file)
set( _generated_files "${_name}.${extension}")
endif()
set_source_files_properties( ${_generated_files} PROPERTIES GENERATED TRUE )

# Cython creates a lot of compiler warning detritus on clang
set_source_files_properties(${_generated_files} PROPERTIES
COMPILE_FLAGS -Wno-unused-function)

set( ${generated_files} ${_generated_files} PARENT_SCOPE )

# Add the command to run the compiler.
Expand Down
4 changes: 4 additions & 0 deletions python/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pytest
numpy>=1.7.0
pandas>=0.12.0
six
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def get_ext_built(self, name):
return name + suffix

def get_cmake_cython_names(self):
return ['array', 'config', 'error', 'parquet', 'scalar', 'schema']
return ['array', 'config', 'error', 'scalar', 'schema']

def get_names(self):
return self._found_names
Expand Down
20 changes: 13 additions & 7 deletions python/src/pyarrow/adapters/builtin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <arrow/api.h>

#include "pyarrow/helpers.h"
#include "pyarrow/status.h"

using arrow::ArrayBuilder;
Expand Down Expand Up @@ -74,16 +75,16 @@ class ScalarVisitor {
std::shared_ptr<DataType> GetType() {
// TODO(wesm): handling mixed-type cases
if (float_count_) {
return arrow::DOUBLE;
return DOUBLE;
} else if (int_count_) {
// TODO(wesm): tighter type later
return arrow::INT64;
return INT64;
} else if (bool_count_) {
return arrow::BOOL;
return BOOL;
} else if (string_count_) {
return arrow::STRING;
return STRING;
} else {
return arrow::NA;
return NA;
}
}

Expand Down Expand Up @@ -145,7 +146,7 @@ class SeqVisitor {
std::shared_ptr<DataType> GetType() {
if (scalars_.total_count() == 0) {
if (max_nesting_level_ == 0) {
return arrow::NA;
return NA;
} else {
return nullptr;
}
Expand Down Expand Up @@ -209,14 +210,19 @@ static Status InferArrowType(PyObject* obj, int64_t* size,

// For 0-length sequences, refuse to guess
if (*size == 0) {
*out_type = arrow::NA;
*out_type = NA;
}

SeqVisitor seq_visitor;
PY_RETURN_NOT_OK(seq_visitor.Visit(obj));
PY_RETURN_NOT_OK(seq_visitor.Validate());

*out_type = seq_visitor.GetType();

if (*out_type == nullptr) {
return Status::TypeError("Unable to determine data type");
}

return Status::OK();
}

Expand Down
Loading

0 comments on commit 8367527

Please sign in to comment.