Skip to content

Commit

Permalink
add pyarrw_compute.h
Browse files Browse the repository at this point in the history
  • Loading branch information
JerAguilon committed Jan 5, 2024
1 parent 07f55c5 commit fd4c877
Show file tree
Hide file tree
Showing 13 changed files with 185 additions and 15 deletions.
3 changes: 2 additions & 1 deletion python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ set(PYARROW_CPP_SRCS
${PYARROW_CPP_SOURCE_DIR}/python_test.cc
${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
${PYARROW_CPP_SOURCE_DIR}/pyarrow_compute.cc
${PYARROW_CPP_SOURCE_DIR}/serialize.cc
${PYARROW_CPP_SOURCE_DIR}/udf.cc)
set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/init.cc
Expand Down Expand Up @@ -379,7 +380,7 @@ if(NOT PYARROW_CPP_LINK_LIBS)
endif()

add_library(arrow_python SHARED ${PYARROW_CPP_SRCS})
file(GLOB_RECURSE SOURCES "${CMAKE_CURRENT_BINARY_DIR}/**/*")
#file(GLOB_RECURSE SOURCES "${CMAKE_CURRENT_BINARY_DIR}/**/*")
target_include_directories(arrow_python PUBLIC ${PYARROW_CPP_ROOT_DIR}
${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src)

Expand Down
14 changes: 14 additions & 0 deletions python/examples/unwrapping/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# arrow_wrap_unwrap

This is some arbitrary code to demonstrate [wrapping and
unwrapping](https://arrow.apache.org/docs/python/integration/extending.html)
functionality. This tooling allows you to interop pyarrow objects with Cython or Arrow
C++.

Instructions:

```
python setup.py build_ext --inplace
./run_demo.py
```
61 changes: 61 additions & 0 deletions python/examples/unwrapping/example.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# distutils: language=c++

import pyarrow as pa
import pyarrow.acero as pac
import pyarrow.compute as pc

from pyarrow cimport *
from pyarrow.lib cimport *
from pyarrow.lib_compute cimport *
from pyarrow.lib_acero cimport *

def unwrap_wrap_arr(array):
cdef shared_ptr[CArray] arr = pyarrow_unwrap_array(array)
out = pyarrow_wrap_array(arr)
return out

def unwrap_wrap_declaration(declaration):
cdef CDeclaration decl = pyarrow_unwrap_declaration(declaration)
output = pyarrow_wrap_declaration(decl)
return output

def unwrap_wrap_options(options):
cdef shared_ptr[CExecNodeOptions] decl = pyarrow_unwrap_exec_node_options(options)
output = pyarrow_wrap_exec_node_options(decl)
return output

def unwrap_wrap_expression(expression):
cdef CExpression expr = pyarrow_unwrap_expression(expression)
output = pyarrow_wrap_expression(expr)
return output


def run_test():
print("Starting demo")
arr = pa.array(["a", "b", "a"])

print("Wrapping and unwrapping array")
arr = unwrap_wrap_arr(arr)
print(arr)

print("Constructing a table")
table = pa.Table.from_arrays([arr], names=["foo"])
print(table)

expression = (pc.field("foo") == pc.scalar("a"))
print("Wrapping and unwrapping expression")
expression = unwrap_wrap_expression(expression)
print(expression)

print("Filtering the table")
table = table.filter(expression)

print("Running a no-op acero node")
options = pac.TableSourceNodeOptions(table)
options = unwrap_wrap_options(options)

source_node = pac.Declaration("table_source", options, [])
source_node = unwrap_wrap_declaration(source_node)
print(source_node.to_table())


6 changes: 6 additions & 0 deletions python/examples/unwrapping/run_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env python3

import example

if __name__ == '__main__':
example.run_test()
22 changes: 22 additions & 0 deletions python/examples/unwrapping/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from setuptools import setup
from Cython.Build import cythonize

import os
import numpy as np
import pyarrow as pa


ext_modules = cythonize("example.pyx")

for ext in ext_modules:
# The Numpy C headers are currently required
ext.include_dirs.append(np.get_include())
ext.include_dirs.append(pa.get_include())
ext.libraries.extend(pa.get_libraries())
ext.library_dirs.extend(pa.get_library_dirs())

if os.name == 'posix':
ext.extra_compile_args.append('-std=c++17')

setup(ext_modules=ext_modules)

4 changes: 4 additions & 0 deletions python/pyarrow/includes/libarrow_python.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
int import_pyarrow() except -1


cdef extern from "arrow/python/pyarrow_compute.h" namespace "arrow::py":
int import_pyarrow_compute() except -1


cdef extern from "arrow/python/common.h" namespace "arrow::py":
c_bool IsPyError(const CStatus& status)
void RestorePyError(const CStatus& status) except *
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/lib_compute.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from pyarrow.lib cimport *
from pyarrow._compute cimport Expression

import_pyarrow_compute()

cdef api bint pyarrow_is_expression(object expression):
return isinstance(expression, Expression)

Expand Down
3 changes: 0 additions & 3 deletions python/pyarrow/src/arrow/python/pyarrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <utility>

#include "arrow/array.h"
#include "arrow/compute/expression.h"
#include "arrow/table.h"
#include "arrow/tensor.h"
#include "arrow/type.h"
Expand Down Expand Up @@ -66,8 +65,6 @@ DEFINE_WRAP_FUNCTIONS(tensor, std::shared_ptr<Tensor>, out)
DEFINE_WRAP_FUNCTIONS(batch, std::shared_ptr<RecordBatch>, out)
DEFINE_WRAP_FUNCTIONS(table, std::shared_ptr<Table>, out)

DEFINE_WRAP_FUNCTIONS(expression, compute::Expression, out.is_valid())

namespace internal {

int check_status(const Status& status) { return ::pyarrow_internal_check_status(status); }
Expand Down
6 changes: 0 additions & 6 deletions python/pyarrow/src/arrow/python/pyarrow.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ class Status;
class Table;
class Tensor;

namespace compute {
class Expression;
}

namespace py {

// Returns 0 on success, -1 on error.
Expand All @@ -70,8 +66,6 @@ DECLARE_WRAP_FUNCTIONS(tensor, std::shared_ptr<Tensor>)
DECLARE_WRAP_FUNCTIONS(batch, std::shared_ptr<RecordBatch>)
DECLARE_WRAP_FUNCTIONS(table, std::shared_ptr<Table>)

DECLARE_WRAP_FUNCTIONS(expression, compute::Expression)

namespace internal {

// If status is ok, return 0.
Expand Down
5 changes: 1 addition & 4 deletions python/pyarrow/src/arrow/python/pyarrow_acero.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ extern "C++" {

namespace arrow {

// Forward declarations. Actual wrappers/unwrappers are in pyarrow.{h,cc}
// Forward declarations. Actual wrappers/unwrappers are in pyarrow_acero.{h,cc}
namespace acero {
struct Declaration;
class ExecNodeOptions;
Expand All @@ -38,9 +38,6 @@ ARROW_PYTHON_EXPORT int import_pyarrow_acero();
DECLARE_WRAP_FUNCTIONS(declaration, acero::Declaration)
DECLARE_WRAP_FUNCTIONS(exec_node_options, std::shared_ptr<acero::ExecNodeOptions>)

// If status is ok, return 0.
// If status is not ok, set Python error indicator and return -1.
ARROW_PYTHON_EXPORT int check_status(const Status& status);
} // namespace py
} // namespace arrow

Expand Down
1 change: 0 additions & 1 deletion python/pyarrow/src/arrow/python/pyarrow_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,3 @@

// For backward compatibility.
#include "arrow/python/lib_api.h"
#include "arrow/python/lib_compute_api.h"
31 changes: 31 additions & 0 deletions python/pyarrow/src/arrow/python/pyarrow_compute.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "arrow/compute/expression.h"

#include "arrow/python/lib_compute_api.h"
#include "arrow/python/pyarrow_compute.h"
#include "arrow/python/wrap_macros.h"

namespace arrow {
namespace py {

int import_pyarrow_compute() { return ::import_pyarrow__lib_compute(); }

DEFINE_WRAP_FUNCTIONS(expression, compute::Expression, out.is_valid())

} // namespace py
} // namespace arrow
42 changes: 42 additions & 0 deletions python/pyarrow/src/arrow/python/pyarrow_compute.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/python/visibility.h"
#include "arrow/python/wrap_macros.h"

// Work around ARROW-2317 (C linkage warning from Cython)
extern "C++" {

namespace arrow {

// Forward declarations. Actual wrappers/unwrappers are in pyarrow_compute.{h,cc}
namespace compute {
class Expression;
}

namespace py {

ARROW_PYTHON_EXPORT int import_pyarrow_compute();

DECLARE_WRAP_FUNCTIONS(expression, compute::Expression)

} // namespace py
} // namespace arrow

} // extern "C++"

0 comments on commit fd4c877

Please sign in to comment.