Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Arrow - Velox conversion support #4450

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build_pyvelox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ jobs:
cp -R /host${{ github.workspace }}/.ccache /output/.ccache &&
ccache -s
CIBW_ENVIRONMENT_PASS_LINUX: CCACHE_DIR BUILD_VERSION
CIBW_TEST_EXTRAS: "tests"
CIBW_TEST_COMMAND: "cd {project}/pyvelox && python -m unittest -v"
CIBW_TEST_SKIP: "*macos*"
CCACHE_DIR: "${{ matrix.os != 'macos-11' && '/output' || github.workspace }}/.ccache"
Expand Down
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ python-clean:
DEBUG=1 ${PYTHON_EXECUTABLE} setup.py clean

python-build:
DEBUG=1 CMAKE_BUILD_PARALLEL_LEVEL=4 ${PYTHON_EXECUTABLE} setup.py develop
DEBUG=1 CMAKE_BUILD_PARALLEL_LEVEL=4 ${PYTHON_EXECUTABLE} -m pip install -e .$(extras) --verbose

python-test: python-build
python-test:
$(MAKE) python-build extras="[tests]"
DEBUG=1 ${PYTHON_EXECUTABLE} -m unittest -v
3 changes: 2 additions & 1 deletion pyvelox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ if(VELOX_BUILD_PYTHON_PACKAGE)
include_directories(SYSTEM ${CMAKE_SOURCE_DIR})
add_definitions(-DCREATE_PYVELOX_MODULE -DVELOX_DISABLE_GOOGLETEST)
# Define our Python module:
pybind11_add_module(pyvelox MODULE pyvelox.cpp serde.cpp signatures.cpp)
pybind11_add_module(pyvelox MODULE pyvelox.cpp serde.cpp signatures.cpp
conversion.cpp)
# Link with Velox:
target_link_libraries(
pyvelox
Expand Down
52 changes: 52 additions & 0 deletions pyvelox/conversion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "conversion.h"
#include <velox/vector/arrow/Abi.h>
#include <velox/vector/arrow/Bridge.h>
#include "context.h"

namespace facebook::velox::py {

namespace py = pybind11;

void addConversionBindings(py::module& m, bool asModuleLocalDefinitions) {
m.def("export_to_arrow", [](VectorPtr& inputVector) {
auto arrowArray = std::make_unique<ArrowArray>();
auto pool_ = PyVeloxContext::getSingletonInstance().pool();
facebook::velox::exportToArrow(inputVector, *arrowArray, pool_);

auto arrowSchema = std::make_unique<ArrowSchema>();
facebook::velox::exportToArrow(inputVector, *arrowSchema);

py::module arrow_module = py::module::import("pyarrow");
py::object array_class = arrow_module.attr("Array");
return array_class.attr("_import_from_c")(
reinterpret_cast<uintptr_t>(arrowArray.get()),
reinterpret_cast<uintptr_t>(arrowSchema.get()));
});

m.def("import_from_arrow", [](py::object inputArrowArray) {
auto arrowArray = std::make_unique<ArrowArray>();
auto arrowSchema = std::make_unique<ArrowSchema>();
inputArrowArray.attr("_export_to_c")(
reinterpret_cast<uintptr_t>(arrowArray.get()),
reinterpret_cast<uintptr_t>(arrowSchema.get()));
auto pool_ = PyVeloxContext::getSingletonInstance().pool();
return importFromArrowAsOwner(*arrowSchema, *arrowArray, pool_);
});
}
} // namespace facebook::velox::py
34 changes: 34 additions & 0 deletions pyvelox/conversion.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <pybind11/pybind11.h>

namespace facebook::velox::py {

namespace py = pybind11;

/// Adds bindings for arrow-velox conversion functions to module m.
///
/// @param m Module to add bindings to.
/// @param asModuleLocalDefinitions If true then these bindings are only
/// visible inside the module. Refer to
/// https://pybind11.readthedocs.io/en/stable/advanced/classes.html#module-local-class-bindings
/// for further details.
void addConversionBindings(py::module& m, bool asModuleLocalDefinitions = true);

} // namespace facebook::velox::py
2 changes: 2 additions & 0 deletions pyvelox/pyvelox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "pyvelox.h"
#include "conversion.h"
#include "serde.h"
#include "signatures.h"

Expand Down Expand Up @@ -294,6 +295,7 @@ PYBIND11_MODULE(pyvelox, m) {
addVeloxBindings(m);
addSignatureBindings(m);
addSerdeBindings(m);
addConversionBindings(m);
m.attr("__version__") = "dev";
}
#endif
Expand Down
52 changes: 50 additions & 2 deletions pyvelox/test/test_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import pyarrow as pa
import pyvelox.pyvelox as pv
import unittest


class TestVeloxVector(unittest.TestCase):
Expand Down Expand Up @@ -273,3 +273,51 @@ def test_slice(self):

with self.assertRaises(NotImplementedError):
e = a[3:8:3]

def test_export_to_arrow(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks this is great, can we add export / import tests for all the types we support currently though ?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's certainly good to add some more tests, but just to mention that the actual conversion itself (the C++ code) is also already tested at https://github.com/facebookincubator/velox/blob/main/velox/vector/arrow/tests/ArrowBridgeArrayTest.cpp

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @jorisvandenbossche , nevertheless would be good to add tests for atleast the primitive types to make sure there are no inadvertent casts across types , since its going via Cpp to Python etc. Also PyVelox doesnt support all the types thats Velox supports yet :) .

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added the test cases for integers, floats, and strings. Should I add some other test cases as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, while testing multiple test cases, I noticed, that the memory pool instance was not handled correctly earlier. So, I had to move the Instance struct to a separate header file, does this approach look good, or can there be a better way?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think the latest vectorsaver changes also moved it to a different file. Please merge from latest.

test_cases = [
([1, 2, 3], pa.int64()),
([1.1, 2.2, 3.3], pa.float64()),
(["ab", "bc", "ca"], pa.string()),
]
for data, expected_type in test_cases:
with self.subTest(data=data):
vector = pv.from_list(data)
array = pv.export_to_arrow(vector)

self.assertEqual(array.type, expected_type)
self.assertEqual(len(array), len(data))
self.assertListEqual(array.tolist(), data)

def test_import_from_arrow(self):
test_cases = [
([11, 26, 31], pa.int64(), pv.IntegerType()),
([0.1, 2.5, 3.9], pa.float64(), pv.DoubleType()),
(["az", "by", "cx"], pa.string(), pv.VarcharType()),
]
for data, dtype, expected_type in test_cases:
with self.subTest(data=data):
array = pa.array(data, type=dtype)
velox_vector = pv.import_from_arrow(array)

self.assertEqual(velox_vector.size(), len(data))
self.assertTrue(velox_vector.dtype(), expected_type)
for i in range(0, len(data)):
self.assertEqual(velox_vector[i], data[i])

def test_roundtrip_conversion(self):
test_cases = [
([41, 92, 13], pv.IntegerType()),
([17.19, 22.25, 13.3], pv.DoubleType()),
(["aa1", "bb2", "cc3"], pv.VarcharType()),
]
for data, expected_type in test_cases:
with self.subTest(data=data):
vector = pv.from_list(data)
array = pv.export_to_arrow(vector)

velox_vector = pv.import_from_arrow(array)
self.assertEqual(velox_vector.size(), len(data))
self.assertTrue(velox_vector.dtype(), expected_type)
for i in range(0, len(data)):
self.assertEqual(velox_vector[i], data[i])
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ def build_extension(self, ext):
"typing",
"tabulate",
"typing-inspect",
"pyarrow",
],
extras_require={"tests": ["pyarrow"]},
python_requires=">=3.7",
classifiers=[
"Intended Audience :: Developers",
Expand Down