Skip to content

Commit

Permalink
Merge pull request apache#4 from jorisvandenbossche/dataset-parquet-e…
Browse files Browse the repository at this point in the history
…ncryption-cython-refactor

Dataset parquet encryption cython refactor
  • Loading branch information
tolleybot authored Oct 10, 2023
2 parents 46cc668 + 6777353 commit 9df68d3
Show file tree
Hide file tree
Showing 11 changed files with 431 additions and 409 deletions.
14 changes: 6 additions & 8 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,12 @@ if(PYARROW_BUILD_PARQUET_ENCRYPTION)
else()
list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_static)
endif()
message(STATUS "Parquet Encryption Enabled")
else()
message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON")
endif()
else()
message(STATUS "Parquet Encryption is NOT Enabled")
endif()

if(PYARROW_BUILD_HDFS)
Expand Down Expand Up @@ -627,6 +630,9 @@ if(PYARROW_BUILD_PARQUET)
endif()
if(PYARROW_BUILD_DATASET)
list(APPEND CYTHON_EXTENSIONS _dataset_parquet)
if(PYARROW_BUILD_PARQUET_ENCRYPTION)
list(APPEND CYTHON_EXTENSIONS _dataset_parquet_encryption)
endif()
endif()
endif()

Expand Down Expand Up @@ -713,14 +719,6 @@ endif()
# Error on any warnings not already explicitly ignored.
set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--warning-errors")

if(PYARROW_BUILD_PARQUET_ENCRYPTION)
message(STATUS "Parquet Encryption Enabled")
list(APPEND CYTHON_FLAGS "-E" "PARQUET_ENCRYPTION_ENABLED=1")
else()
message(STATUS "Parquet Encryption is NOT Enabled")
list(APPEND CYTHON_FLAGS "-E" "PARQUET_ENCRYPTION_ENABLED=0")
endif()

foreach(module ${CYTHON_EXTENSIONS})
string(REPLACE "." ";" directories ${module})
list(GET directories -1 module_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,28 @@
# specific language governing permissions and limitations
# under the License.

# distutils: language = c++
# cython: language_level = 3

"""Dataset support for Parquet file format."""

from pyarrow.includes.libarrow_dataset cimport *
from pyarrow._parquet cimport *

cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
cdef cppclass CParquetFileWriteOptions \
"arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions):
shared_ptr[WriterProperties] writer_properties
shared_ptr[ArrowWriterProperties] arrow_writer_properties

cdef cppclass CParquetFragmentScanOptions \
"arrow::dataset::ParquetFragmentScanOptions"(CFragmentScanOptions):
shared_ptr[CReaderProperties] reader_properties
shared_ptr[ArrowReaderProperties] arrow_reader_properties
from pyarrow.includes.libarrow_dataset_parquet cimport *

from pyarrow._dataset cimport FragmentScanOptions, FileWriteOptions


cdef class ParquetFragmentScanOptions(FragmentScanOptions):
cdef:
CParquetFragmentScanOptions* parquet_options
object _parquet_decryption_config

cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp)
cdef CReaderProperties* reader_properties(self)
cdef ArrowReaderProperties* arrow_reader_properties(self)


cdef class ParquetFileWriteOptions(FileWriteOptions):

cdef:
CParquetFileWriteOptions* parquet_options
object _properties
Loading

0 comments on commit 9df68d3

Please sign in to comment.