diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b6708342c1104..c6d192ee0afdb 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -288,6 +288,7 @@ set(PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/python_test.cc ${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc + ${PYARROW_CPP_SOURCE_DIR}/pyarrow_compute.cc ${PYARROW_CPP_SOURCE_DIR}/serialize.cc ${PYARROW_CPP_SOURCE_DIR}/udf.cc) set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/init.cc @@ -379,7 +380,6 @@ if(NOT PYARROW_CPP_LINK_LIBS) endif() add_library(arrow_python SHARED ${PYARROW_CPP_SRCS}) -file(GLOB_RECURSE SOURCES "${CMAKE_CURRENT_BINARY_DIR}/**/*") target_include_directories(arrow_python PUBLIC ${PYARROW_CPP_ROOT_DIR} ${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src) diff --git a/python/examples/unwrapping/README.md b/python/examples/unwrapping/README.md new file mode 100644 index 0000000000000..439fa5e5306a9 --- /dev/null +++ b/python/examples/unwrapping/README.md @@ -0,0 +1,14 @@ +# arrow_wrap_unwrap + +This is some arbitrary code to demonstrate [wrapping and +unwrapping](https://arrow.apache.org/docs/python/integration/extending.html) +functionality. This tooling allows you to interop pyarrow objects with Cython or Arrow +C++. + +Instructions: + +``` +python setup.py build_ext --inplace + +./run_demo.py +``` diff --git a/python/examples/unwrapping/example.pyx b/python/examples/unwrapping/example.pyx new file mode 100644 index 0000000000000..552fcda0a7da0 --- /dev/null +++ b/python/examples/unwrapping/example.pyx @@ -0,0 +1,61 @@ +# distutils: language=c++ + +import pyarrow as pa +import pyarrow.acero as pac +import pyarrow.compute as pc + +from pyarrow cimport * +from pyarrow.lib cimport * +from pyarrow.lib_compute cimport * +from pyarrow.lib_acero cimport * + +def unwrap_wrap_arr(array): + cdef shared_ptr[CArray] arr = pyarrow_unwrap_array(array) + out = pyarrow_wrap_array(arr) + return out + +def unwrap_wrap_declaration(declaration): + cdef CDeclaration decl = pyarrow_unwrap_declaration(declaration) + output = pyarrow_wrap_declaration(decl) + return output + +def unwrap_wrap_options(options): + cdef shared_ptr[CExecNodeOptions] decl = pyarrow_unwrap_exec_node_options(options) + output = pyarrow_wrap_exec_node_options(decl) + return output + +def unwrap_wrap_expression(expression): + cdef CExpression expr = pyarrow_unwrap_expression(expression) + output = pyarrow_wrap_expression(expr) + return output + + +def run_test(): + print("Starting demo") + arr = pa.array(["a", "b", "a"]) + + print("Wrapping and unwrapping array") + arr = unwrap_wrap_arr(arr) + print(arr) + + print("Constructing a table") + table = pa.Table.from_arrays([arr], names=["foo"]) + print(table) + + expression = (pc.field("foo") == pc.scalar("a")) + print("Wrapping and unwrapping expression") + expression = unwrap_wrap_expression(expression) + print(expression) + + print("Filtering the table") + table = table.filter(expression) + + print("Running a no-op acero node") + options = pac.TableSourceNodeOptions(table) + options = unwrap_wrap_options(options) + + source_node = pac.Declaration("table_source", options, []) + source_node = unwrap_wrap_declaration(source_node) + print(source_node.to_table()) + + diff --git a/python/examples/unwrapping/run_demo.py b/python/examples/unwrapping/run_demo.py new file mode 100755 index 0000000000000..1f85f6d039d51 --- /dev/null +++ b/python/examples/unwrapping/run_demo.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +import example + +if __name__ == '__main__': + example.run_test() diff --git a/python/examples/unwrapping/setup.py b/python/examples/unwrapping/setup.py new file mode 100644 index 0000000000000..b41e4f4219ab0 --- /dev/null +++ b/python/examples/unwrapping/setup.py @@ -0,0 +1,21 @@ +from setuptools import setup +from Cython.Build import cythonize + +import os +import numpy as np +import pyarrow as pa + + +ext_modules = cythonize("example.pyx") + +for ext in ext_modules: + # The Numpy C headers are currently required + ext.include_dirs.append(np.get_include()) + ext.include_dirs.append(pa.get_include()) + ext.libraries.extend(pa.get_libraries()) + ext.library_dirs.extend(pa.get_library_dirs()) + + if os.name == 'posix': + ext.extra_compile_args.append('-std=c++17') + +setup(ext_modules=ext_modules) diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd index 4d109fc660e08..2dbfa911b4784 100644 --- a/python/pyarrow/includes/libarrow_python.pxd +++ b/python/pyarrow/includes/libarrow_python.pxd @@ -256,6 +256,10 @@ cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py": int import_pyarrow() except -1 +cdef extern from "arrow/python/pyarrow_compute.h" namespace "arrow::py": + int import_pyarrow_compute() except -1 + + cdef extern from "arrow/python/common.h" namespace "arrow::py": c_bool IsPyError(const CStatus& status) void RestorePyError(const CStatus& status) except * diff --git a/python/pyarrow/lib_compute.pyx b/python/pyarrow/lib_compute.pyx index ffe9083eff865..e005332792f87 100644 --- a/python/pyarrow/lib_compute.pyx +++ b/python/pyarrow/lib_compute.pyx @@ -22,6 +22,8 @@ from pyarrow.lib cimport * from pyarrow._compute cimport Expression +import_pyarrow_compute() + cdef api bint pyarrow_is_expression(object expression): return isinstance(expression, Expression) diff --git a/python/pyarrow/src/arrow/python/pyarrow.cc b/python/pyarrow/src/arrow/python/pyarrow.cc index d58d971b63cc2..b02a1618d491a 100644 --- a/python/pyarrow/src/arrow/python/pyarrow.cc +++ b/python/pyarrow/src/arrow/python/pyarrow.cc @@ -18,7 +18,6 @@ #include #include "arrow/array.h" -#include "arrow/compute/expression.h" #include "arrow/table.h" #include "arrow/tensor.h" #include "arrow/type.h" @@ -66,8 +65,6 @@ DEFINE_WRAP_FUNCTIONS(tensor, std::shared_ptr, out) DEFINE_WRAP_FUNCTIONS(batch, std::shared_ptr, out) DEFINE_WRAP_FUNCTIONS(table, std::shared_ptr, out) -DEFINE_WRAP_FUNCTIONS(expression, compute::Expression, out.is_valid()) - namespace internal { int check_status(const Status& status) { return ::pyarrow_internal_check_status(status); } diff --git a/python/pyarrow/src/arrow/python/pyarrow.h b/python/pyarrow/src/arrow/python/pyarrow.h index 73b896ced44dc..277b919c35c62 100644 --- a/python/pyarrow/src/arrow/python/pyarrow.h +++ b/python/pyarrow/src/arrow/python/pyarrow.h @@ -41,10 +41,6 @@ class Status; class Table; class Tensor; -namespace compute { -class Expression; -} - namespace py { // Returns 0 on success, -1 on error. @@ -70,8 +66,6 @@ DECLARE_WRAP_FUNCTIONS(tensor, std::shared_ptr) DECLARE_WRAP_FUNCTIONS(batch, std::shared_ptr) DECLARE_WRAP_FUNCTIONS(table, std::shared_ptr
) -DECLARE_WRAP_FUNCTIONS(expression, compute::Expression) - namespace internal { // If status is ok, return 0. diff --git a/python/pyarrow/src/arrow/python/pyarrow_acero.h b/python/pyarrow/src/arrow/python/pyarrow_acero.h index 95b7832193824..87eac92f6986a 100644 --- a/python/pyarrow/src/arrow/python/pyarrow_acero.h +++ b/python/pyarrow/src/arrow/python/pyarrow_acero.h @@ -25,7 +25,7 @@ extern "C++" { namespace arrow { -// Forward declarations. Actual wrappers/unwrappers are in pyarrow.{h,cc} +// Forward declarations. Actual wrappers/unwrappers are in pyarrow_acero.{h,cc} namespace acero { struct Declaration; class ExecNodeOptions; @@ -38,9 +38,6 @@ ARROW_PYTHON_EXPORT int import_pyarrow_acero(); DECLARE_WRAP_FUNCTIONS(declaration, acero::Declaration) DECLARE_WRAP_FUNCTIONS(exec_node_options, std::shared_ptr) -// If status is ok, return 0. -// If status is not ok, set Python error indicator and return -1. -ARROW_PYTHON_EXPORT int check_status(const Status& status); } // namespace py } // namespace arrow diff --git a/python/pyarrow/src/arrow/python/pyarrow_api.h b/python/pyarrow/src/arrow/python/pyarrow_api.h index 3380361e2803f..a476e55a2a111 100644 --- a/python/pyarrow/src/arrow/python/pyarrow_api.h +++ b/python/pyarrow/src/arrow/python/pyarrow_api.h @@ -17,4 +17,3 @@ // For backward compatibility. #include "arrow/python/lib_api.h" -#include "arrow/python/lib_compute_api.h" diff --git a/python/pyarrow/src/arrow/python/pyarrow_compute.cc b/python/pyarrow/src/arrow/python/pyarrow_compute.cc new file mode 100644 index 0000000000000..9406e0e12f1f1 --- /dev/null +++ b/python/pyarrow/src/arrow/python/pyarrow_compute.cc @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "arrow/compute/expression.h" + +#include "arrow/python/lib_compute_api.h" +#include "arrow/python/pyarrow_compute.h" +#include "arrow/python/wrap_macros.h" + +namespace arrow { +namespace py { + +int import_pyarrow_compute() { return ::import_pyarrow__lib_compute(); } + +DEFINE_WRAP_FUNCTIONS(expression, compute::Expression, out.is_valid()) + +} // namespace py +} // namespace arrow diff --git a/python/pyarrow/src/arrow/python/pyarrow_compute.h b/python/pyarrow/src/arrow/python/pyarrow_compute.h new file mode 100644 index 0000000000000..52cf1c1480354 --- /dev/null +++ b/python/pyarrow/src/arrow/python/pyarrow_compute.h @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/python/visibility.h" +#include "arrow/python/wrap_macros.h" + +// Work around ARROW-2317 (C linkage warning from Cython) +extern "C++" { + +namespace arrow { + +// Forward declarations. Actual wrappers/unwrappers are in pyarrow_compute.{h,cc} +namespace compute { +class Expression; +} + +namespace py { + +ARROW_PYTHON_EXPORT int import_pyarrow_compute(); + +DECLARE_WRAP_FUNCTIONS(expression, compute::Expression) + +} // namespace py +} // namespace arrow + +} // extern "C++"