Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
1d54a4f
Starting changes
sofia-tekdatum Jan 25, 2026
601849e
First attempt at adding new external encryption to meson.build
sofia-tekdatum Jan 27, 2026
cdd51cc
Fixing typo
sofia-tekdatum Jan 27, 2026
f65cc16
Fixing typo
sofia-tekdatum Jan 27, 2026
7b88e4b
Fixing typo
sofia-tekdatum Jan 27, 2026
c6207ed
Adding dbps external project
sofia-tekdatum Jan 27, 2026
3d8bb08
Fix path
sofia-tekdatum Jan 27, 2026
91f196d
Restructuring subproject
sofia-tekdatum Jan 27, 2026
efc52ef
Different dependency
sofia-tekdatum Jan 27, 2026
0e83c18
Another attempt
sofia-tekdatum Jan 27, 2026
701866e
Correcting directory name
sofia-tekdatum Jan 27, 2026
0c409b4
Another attempt
sofia-tekdatum Jan 27, 2026
c88ad32
Removing absolute paths
sofia-tekdatum Jan 27, 2026
24e247a
Removing absolute paths
sofia-tekdatum Jan 27, 2026
f8979f5
Removing absolute paths
sofia-tekdatum Jan 27, 2026
dad2c62
Removing absolute paths
sofia-tekdatum Jan 27, 2026
5602a8f
Add missing dependencies to tcb and magic enum
sofia-tekdatum Jan 27, 2026
b9f6899
Changing dep download
sofia-tekdatum Jan 27, 2026
6858b57
Linter and test dependencies
sofia-tekdatum Jan 27, 2026
01d10b3
typo
sofia-tekdatum Jan 27, 2026
05083e9
Benchmark lib failing
sofia-tekdatum Jan 27, 2026
6117f47
Last attempt today
sofia-tekdatum Jan 27, 2026
653fe31
Last attempt today
sofia-tekdatum Jan 27, 2026
f0dcaa1
Linker errors
sofia-tekdatum Jan 27, 2026
03ec9df
Linker errors
sofia-tekdatum Jan 27, 2026
5e45d22
Linker errors
sofia-tekdatum Jan 27, 2026
5a2c680
Linker errors
sofia-tekdatum Jan 27, 2026
0449773
Linker errors
sofia-tekdatum Jan 27, 2026
3df6c29
Trying to fix cycle
sofia-tekdatum Jan 27, 2026
f33d31e
Patching DBPS build file
sofia-tekdatum Jan 27, 2026
7c0eb37
Patching DBPS build file
sofia-tekdatum Jan 27, 2026
aa9b0e0
Importing DBPS as headers-only in meson
argmarco-tkd Jan 27, 2026
bd0ac7a
fix meson
argmarco-tkd Jan 28, 2026
7334036
making meson build test shared libs for encryption/external
argmarco-tkd Jan 28, 2026
31a30c5
improved logging
argmarco-tkd Jan 28, 2026
3896539
additional printlns
argmarco-tkd Feb 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cpp/meson.options
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ option(
description: 'Arbitrary string that identifies the kind of package (for informational purposes)',
)
option('parquet', type: 'feature', description: 'Build the Parquet libraries')
option(
'parquet_build_dbps_libs',
type: 'feature',
value: 'enabled',
description: 'Build DBPS external libraries',
)
option(
'parquet_build_executables',
type: 'feature',
Expand Down
14 changes: 10 additions & 4 deletions cpp/src/arrow/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -385,13 +385,17 @@ if needs_filesystem
{'BUILD_PERFORMANCE_TESTS': 'FALSE'},
{'BUILD_SAMPLES': 'FALSE'},
{'BUILD_TESTING': 'FALSE'},
{'BUILD_WINDOWS_UWP': 'TRUE'},
{'CMAKE_UNITY_BUILD': 'FALSE'},
{'DISABLE_AZURE_CORE_OPENTELEMETRY': 'TRUE'},
{'ENV{AZURE_SDK_DISABLE_AUTO_VCPKG}': 'TRUE'},
{'WARNINGS_AS_ERRORS': 'FALSE'},
)
azure_opt.append_compile_args('cpp', '-fPIC')
if host_machine.system() == 'windows'
azure_opt.add_cmake_defines({'BUILD_WINDOWS_UWP': 'TRUE'})
endif
if host_machine.system() != 'windows'
azure_opt.append_compile_args('cpp', '-fPIC')
endif
azure_proj = cmake.subproject('azure', options: azure_opt)

azure_dep = declare_dependency(
Expand Down Expand Up @@ -621,20 +625,22 @@ if needs_testing
boost_opt = cmake.subproject_options()
boost_opt.add_cmake_defines(
{'BOOST_INCLUDE_LIBRARIES': 'filesystem;system'},
# Keep Boost's CMake graph minimal for Meson's CMake introspection.
{'BUILD_TESTING': 'OFF'},
{'BOOST_ENABLE_TESTING': 'OFF'},
{'CMAKE_UNITY_BUILD': 'OFF'},
)
boost_proj = cmake.subproject('boost', options: boost_opt)
filesystem_dep = boost_proj.dependency('boost_filesystem')
endif

gtest_dep = dependency('gtest')
gtest_main_dep = dependency('gtest_main')
gtest_dep = dependency('gtest')
gmock_dep = dependency('gmock')
else
filesystem_dep = disabler()
gtest_dep = disabler()
gtest_main_dep = disabler()
gtest_dep = disabler()
gmock_dep = disabler()
endif

Expand Down
28 changes: 27 additions & 1 deletion cpp/src/parquet/encryption/external/test_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <filesystem>
#include <string>
#include <vector>
#include <iostream>

#ifdef __APPLE__
# include <mach-o/dyld.h>
Expand Down Expand Up @@ -57,6 +58,21 @@ std::string TestUtils::GetExecutableDirectory() {
}

std::string TestUtils::GetTestLibraryPath() {
// Strong override: reuse the same env var as the Python tooling
// (`python/scripts/base_app.py`): DBPA_LIBRARY_PATH.
//
// This allows CI/build systems to provide the exact path to the DBPA agent shared
// library, avoiding reliance on executable-path heuristics or current working directory.
const char* explicit_path = std::getenv("DBPA_LIBRARY_PATH");
if (explicit_path && explicit_path[0]) {
std::string p(explicit_path);
if (std::filesystem::exists(p)) {
return p;
}
throw std::runtime_error("DBPA_LIBRARY_PATH is set but the file does not exist: " +
p);
}

// Check for environment variable to override the executable directory
const char* cwd_override = std::getenv("PARQUET_TEST_LIBRARY_CWD");
std::string base_path;
Expand All @@ -83,7 +99,17 @@ std::string TestUtils::GetTestLibraryPath() {
}
}

throw std::runtime_error("Could not find library");
// Provide a detailed error to make CI failures diagnosable.
std::string msg = "Could not find DBPA test agent library. Tried:\n";
for (const auto& filename : possible_filenames) {
for (const auto& directory : possible_directories) {
msg += " - " + (directory + filename) + "\n";
}
}
msg += "PARQUET_TEST_LIBRARY_CWD=";
msg += (cwd_override && cwd_override[0]) ? cwd_override : "<unset>";
msg += "\n";
throw std::runtime_error(msg);
}

} // namespace parquet::encryption::external::test
39 changes: 37 additions & 2 deletions cpp/src/parquet/encryption/external_dbpa_encryption.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,17 @@ std::unique_ptr<dbps::external::DataBatchProtectionAgentInterface> LoadAndInitia
// Step 1: Get path to the shared library
auto it = configuration_properties.find(SHARED_LIBRARY_PATH_KEY);
if (it == configuration_properties.end()) {
const auto msg = "Required configuration key '" + SHARED_LIBRARY_PATH_KEY +
"' not found in configuration_properties";
std::string msg = "Required configuration key '" + SHARED_LIBRARY_PATH_KEY +
"' not found in configuration_properties. Present keys: ";
bool first = true;
for (const auto& kv : configuration_properties) {
if (!first) msg += ", ";
first = false;
msg += kv.first;
}
if (first) {
msg += "<none>";
}
ARROW_LOG(ERROR) << msg;
throw ParquetException(msg);
}
Expand Down Expand Up @@ -430,6 +439,19 @@ ExternalDBPAEncryptorAdapter* ExternalDBPAEncryptorAdapterFactory::GetEncryptor(
auto app_context = external_file_encryption_properties->app_context();
auto connection_config_for_algorithm = configuration_properties.at(algorithm);

if (::arrow::util::ArrowLog::IsLevelEnabled(
::arrow::util::ArrowLogLevel::ARROW_DEBUG)) {
ARROW_LOG(DEBUG) << "ExternalDBPAEncryptorAdapterFactory::GetEncryptor - "
"selected configuration_properties for EXTERNAL_DBPA_V1:";
if (connection_config_for_algorithm.empty()) {
ARROW_LOG(DEBUG) << " <empty map>";
} else {
for (const auto& [k, v] : connection_config_for_algorithm) {
ARROW_LOG(DEBUG) << " [" << k << "]: [" << v << "]";
}
}
}

std::string key_id;
try {
auto key_metadata =
Expand Down Expand Up @@ -659,6 +681,19 @@ std::unique_ptr<DecryptorInterface> ExternalDBPADecryptorAdapterFactory::GetDecr
auto connection_config_for_algorithm = configuration_properties.at(algorithm);
auto key_value_metadata = column_chunk_metadata->key_value_metadata();

if (::arrow::util::ArrowLog::IsLevelEnabled(
::arrow::util::ArrowLogLevel::ARROW_DEBUG)) {
ARROW_LOG(DEBUG) << "ExternalDBPADecryptorAdapterFactory::GetDecryptor - "
"selected configuration_properties for EXTERNAL_DBPA_V1:";
if (connection_config_for_algorithm.empty()) {
ARROW_LOG(DEBUG) << " <empty map>";
} else {
for (const auto& [k, v] : connection_config_for_algorithm) {
ARROW_LOG(DEBUG) << " [" << k << "]: [" << v << "]";
}
}
}

std::string key_id;
try {
auto key_metadata = KeyMetadata::Parse(crypto_metadata->key_metadata());
Expand Down
123 changes: 116 additions & 7 deletions cpp/src/parquet/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ if not thrift_dep.found()
{
'BUILD_COMPILER': 'OFF',
'BUILD_EXAMPLES': 'OFF',
'BUILD_TESTING': 'OFF',
'BUILD_TESTS': 'OFF',
'BUILD_TUTORIALS': 'OFF',
'CMAKE_UNITY_BUILD': 'OFF',
'WITH_AS3': 'OFF',
Expand All @@ -89,6 +91,10 @@ endif

parquet_deps = [arrow_dep, rapidjson_dep, thrift_dep]

# Default to no DBPA test-agent library (only built when encryption+testing are enabled).
dbpa_test_agent_lib = disabler()
dbpa_test_agent_path = ''

if needs_parquet_encryption or get_option('parquet_require_encryption').auto()
openssl_dep = dependency('openssl', required: needs_parquet_encryption)
else
Expand All @@ -99,8 +105,15 @@ if openssl_dep.found()
parquet_deps += openssl_dep

parquet_srcs += files(
'encryption/aes_encryption.cc',
'encryption/crypto_factory.cc',
'encryption/encryption_internal.cc',
'encryption/encoding_properties.cc',
'encryption/encryption_utils.cc',
'encryption/external/dbpa_enum_utils.cc',
'encryption/external/dbpa_executor.cc',
'encryption/external/dbpa_library_wrapper.cc',
'encryption/external/loadable_encryptor_utils.cc',
'encryption/external_dbpa_encryption.cc',
'encryption/file_key_unwrapper.cc',
'encryption/file_key_wrapper.cc',
'encryption/file_system_key_material_store.cc',
Expand All @@ -112,18 +125,87 @@ if openssl_dep.found()
'encryption/local_wrap_kms_client.cc',
'encryption/openssl_internal.cc',
)

# External DBPA integration and its header-only deps are only relevant when
# encryption support is enabled (i.e., OpenSSL is available).
if needs_parquet_encryption or get_option('parquet_require_encryption').auto()
tcb_span_dep = dependency('tcb_span', fallback: ['tcb-span', 'tcb_span_dep'])
magic_enum_dep = dependency(
'magic_enum_header_only',
fallback: ['magic-enum', 'magic_enum_dep'],
)

# DBPS interface is header-only (dbpa_interface.h and friends).
#
# IMPORTANT: Do not use Meson's CMake interpreter for DBPS here. DBPS' CMake
# target graph (CTest/CDash targets, generator expressions, etc.) has proven
# incompatible with Meson's CMake dependency extractor and can fail Meson
# configuration with "Cycle in CMake inputs/dependencies detected".
#
# Instead we use a tiny Meson wrapper (cpp/subprojects/packagefiles/dbps_agent)
# to expose the header-only dependency, and (optionally) build DBPS shared
# libraries via a build-time custom target that calls the real CMake.
dbps_sp = subproject('dbps_agent')
dbps_interface_dep = dbps_sp.get_variable('dbps_interface_dep')

parquet_deps += [dbps_interface_dep, tcb_span_dep, magic_enum_dep]

if get_option('parquet_build_dbps_libs').enabled()
warning(
'Meson does not build DBPS shared libraries (parquet_build_dbps_libs is a no-op). ' +
'Provide your own agent shared library and set configuration_properties["agent_library_path"], ' +
'or build DBPS via its CMake build separately.',
)
endif

# Build the in-tree DBPA test agent shared library used by external encryption tests.
# CMake builds this as `DBPATestAgent` when ARROW_TESTING is enabled; Meson needs an
# equivalent target so tests can dlopen() `libDBPATestAgent.so`.
#
# Keep it Meson-native (no CMake), and place the output next to parquet test
# executables so `TestUtils::GetTestLibraryPath()` can find it via the executable dir.
if needs_testing
dbpa_test_agent_lib = shared_library(
'DBPATestAgent',
sources: files('encryption/external/dbpa_test_agent.cc'),
include_directories: include_directories('..'),
# Keep this test agent as self-contained as possible. It is dlopen()'d
# by tests, so avoid unnecessary runtime dependencies (e.g., libarrow.so)
# which can differ between Meson/CMake CI environments.
dependencies: [magic_enum_dep, tcb_span_dep, dbps_interface_dep],
install: false,
gnu_symbol_visibility: 'default',
)
dbpa_test_agent_path = dbpa_test_agent_lib.full_path()
endif
endif
else
parquet_srcs += files('encryption/encryption_internal_nossl.cc')
parquet_srcs += files('encryption/aes_encryption_nossl.cc')
endif


# Parquet's CMake build uses explicit export macros and (on ELF) a version script
# to control symbol visibility. Meson doesn't currently replicate that machinery.
# With Meson's default hidden visibility, some non-exported-but-test-used symbols
# (e.g. EncodingProperties, IsParquetCipherSupported) are not linkable from test
# executables. When building tests/benchmarks, relax visibility to avoid link
# failures in Meson CI.
parquet_symbol_visibility = 'inlineshidden'
if needs_testing
parquet_symbol_visibility = 'default'
endif

parquet_lib = library(
'arrow-parquet',
sources: parquet_srcs,
dependencies: parquet_deps,
gnu_symbol_visibility: 'inlineshidden',
gnu_symbol_visibility: parquet_symbol_visibility,
)

parquet_dep = declare_dependency(link_with: parquet_lib)
parquet_dep = declare_dependency(
link_with: parquet_lib,
dependencies: parquet_deps,
)

subdir('api')
subdir('arrow')
Expand Down Expand Up @@ -216,6 +298,7 @@ parquet_tests = {
'writer-test': {
'sources': files(
'column_writer_test.cc',
'encryption/external/test_utils.cc',
'file_serialize_test.cc',
'stream_writer_test.cc',
),
Expand All @@ -226,6 +309,7 @@ parquet_tests = {
'arrow/arrow_reader_writer_test.cc',
'arrow/arrow_statistics_test.cc',
'arrow/variant_test.cc',
'encryption/external/test_utils.cc',
),
},
'arrow-internals-test': {
Expand All @@ -240,18 +324,26 @@ parquet_tests = {
'arrow/arrow_schema_test.cc',
),
},
'file_deserialize_test': {'sources': files('file_deserialize_test.cc')},
'file_deserialize_test': {
'sources': files('file_deserialize_test.cc', 'encryption/external/test_utils.cc'),
},
'schema_test': {'sources': files('schema_test.cc')},
}

if needs_parquet_encryption
parquet_tests += {
'encryption-test': {
'sources': files(
'encryption/encryption_internal_test.cc',
'encryption/aes_encryption_test.cc',
'encryption/crypto_factory_test.cc',
'encryption/encoding_properties_test.cc',
'encryption/external/test_utils.cc',
'encryption/external_dbpa_encryption_test.cc',
'encryption/per_column_encryption_test.cc',
'encryption/properties_test.cc',
'encryption/read_configurations_test.cc',
'encryption/test_encryption_util.cc',
'encryption/test_in_memory_kms.cc',
'encryption/write_configurations_test.cc',
),
},
Expand Down Expand Up @@ -297,7 +389,24 @@ foreach key, val : parquet_tests
sources: val['sources'] + files('test_util.cc'),
dependencies: parquet_test_dep,
)
test(test_name, exc)
# Ensure the DBPA test agent is built before running any tests that may load it.
# (No-op when dbpa_test_agent_lib is disabled/unset.)
test(
test_name,
exc,
depends: dbpa_test_agent_lib,
env: {
# Make DBPATestAgent lookup deterministic under Meson. Some CI setups may
# not allow /proc/self/exe resolution or run tests with unexpected cwd.
'PARQUET_TEST_LIBRARY_CWD': meson.current_build_dir(),
# Reuse the standard env var used by Python tooling (`base_app.py`).
# Prefer this in C++ as well.
'DBPA_LIBRARY_PATH': dbpa_test_agent_path,
# Make DBPA-related code emit useful logs in CI (opt-in via env).
# This helps debug why Meson runs see unexpected configuration_properties.
'PARQUET_DBPA_LOG_LEVEL': 'DEBUG',
},
)
endforeach

parquet_benchmarks = {
Expand Down
23 changes: 23 additions & 0 deletions cpp/subprojects/dbps_agent.wrap
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[wrap-file]
directory = DataBatchProtectionService-6206fb0e27556a0df9160364caa3819e4af3fe0f
source_url = https://github.com/protegrity/DataBatchProtectionService/archive/6206fb0e27556a0df9160364caa3819e4af3fe0f.tar.gz
source_filename = dbps_agent-6206fb0e27556a0df9160364caa3819e4af3fe0f.tar.gz
source_hash = 9c95a1fec0c9851867a776c3241d3feb59b07bd7a50e653d6214e07a8ad62419
patch_directory = dbps_agent
Loading
Loading