Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cpp/meson.options
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ option(
description: 'Arbitrary string that identifies the kind of package (for informational purposes)',
)
option('parquet', type: 'feature', description: 'Build the Parquet libraries')
option(
'parquet_build_dbps_libs',
type: 'feature',
value: 'enabled',
description: 'Build DBPS external libraries',
)
option(
'parquet_build_executables',
type: 'feature',
Expand Down
14 changes: 10 additions & 4 deletions cpp/src/arrow/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -385,13 +385,17 @@ if needs_filesystem
{'BUILD_PERFORMANCE_TESTS': 'FALSE'},
{'BUILD_SAMPLES': 'FALSE'},
{'BUILD_TESTING': 'FALSE'},
{'BUILD_WINDOWS_UWP': 'TRUE'},
{'CMAKE_UNITY_BUILD': 'FALSE'},
{'DISABLE_AZURE_CORE_OPENTELEMETRY': 'TRUE'},
{'ENV{AZURE_SDK_DISABLE_AUTO_VCPKG}': 'TRUE'},
{'WARNINGS_AS_ERRORS': 'FALSE'},
)
azure_opt.append_compile_args('cpp', '-fPIC')
if host_machine.system() == 'windows'
azure_opt.add_cmake_defines({'BUILD_WINDOWS_UWP': 'TRUE'})
endif
if host_machine.system() != 'windows'
azure_opt.append_compile_args('cpp', '-fPIC')
endif
azure_proj = cmake.subproject('azure', options: azure_opt)

azure_dep = declare_dependency(
Expand Down Expand Up @@ -621,20 +625,22 @@ if needs_testing
boost_opt = cmake.subproject_options()
boost_opt.add_cmake_defines(
{'BOOST_INCLUDE_LIBRARIES': 'filesystem;system'},
# Keep Boost's CMake graph minimal for Meson's CMake introspection.
{'BUILD_TESTING': 'OFF'},
{'BOOST_ENABLE_TESTING': 'OFF'},
{'CMAKE_UNITY_BUILD': 'OFF'},
)
boost_proj = cmake.subproject('boost', options: boost_opt)
filesystem_dep = boost_proj.dependency('boost_filesystem')
endif

gtest_dep = dependency('gtest')
gtest_main_dep = dependency('gtest_main')
gtest_dep = dependency('gtest')
gmock_dep = dependency('gmock')
else
filesystem_dep = disabler()
gtest_dep = disabler()
gtest_main_dep = disabler()
gtest_dep = disabler()
gmock_dep = disabler()
endif

Expand Down
28 changes: 27 additions & 1 deletion cpp/src/parquet/encryption/external/test_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <filesystem>
#include <string>
#include <vector>
#include <iostream>

#ifdef __APPLE__
# include <mach-o/dyld.h>
Expand Down Expand Up @@ -57,6 +58,21 @@ std::string TestUtils::GetExecutableDirectory() {
}

std::string TestUtils::GetTestLibraryPath() {
// Strong override: reuse the same env var as the Python tooling
// (`python/scripts/base_app.py`): DBPA_LIBRARY_PATH.
//
// This allows CI/build systems to provide the exact path to the DBPA agent shared
// library, avoiding reliance on executable-path heuristics or current working directory.
const char* explicit_path = std::getenv("DBPA_LIBRARY_PATH");
if (explicit_path && explicit_path[0]) {
std::string p(explicit_path);
if (std::filesystem::exists(p)) {
return p;
}
throw std::runtime_error("DBPA_LIBRARY_PATH is set but the file does not exist: " +
p);
}

// Check for environment variable to override the executable directory
const char* cwd_override = std::getenv("PARQUET_TEST_LIBRARY_CWD");
std::string base_path;
Expand All @@ -83,7 +99,17 @@ std::string TestUtils::GetTestLibraryPath() {
}
}

throw std::runtime_error("Could not find library");
// Provide a detailed error to make CI failures diagnosable.
std::string msg = "Could not find DBPA test agent library. Tried:\n";
for (const auto& filename : possible_filenames) {
for (const auto& directory : possible_directories) {
msg += " - " + (directory + filename) + "\n";
}
}
msg += "PARQUET_TEST_LIBRARY_CWD=";
msg += (cwd_override && cwd_override[0]) ? cwd_override : "<unset>";
msg += "\n";
throw std::runtime_error(msg);
}

} // namespace parquet::encryption::external::test
39 changes: 37 additions & 2 deletions cpp/src/parquet/encryption/external_dbpa_encryption.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,17 @@ std::unique_ptr<dbps::external::DataBatchProtectionAgentInterface> LoadAndInitia
// Step 1: Get path to the shared library
auto it = configuration_properties.find(SHARED_LIBRARY_PATH_KEY);
if (it == configuration_properties.end()) {
const auto msg = "Required configuration key '" + SHARED_LIBRARY_PATH_KEY +
"' not found in configuration_properties";
std::string msg = "Required configuration key '" + SHARED_LIBRARY_PATH_KEY +
"' not found in configuration_properties. Present keys: ";
bool first = true;
for (const auto& kv : configuration_properties) {
if (!first) msg += ", ";
first = false;
msg += kv.first;
}
if (first) {
msg += "<none>";
}
ARROW_LOG(ERROR) << msg;
throw ParquetException(msg);
}
Expand Down Expand Up @@ -430,6 +439,19 @@ ExternalDBPAEncryptorAdapter* ExternalDBPAEncryptorAdapterFactory::GetEncryptor(
auto app_context = external_file_encryption_properties->app_context();
auto connection_config_for_algorithm = configuration_properties.at(algorithm);

if (::arrow::util::ArrowLog::IsLevelEnabled(
::arrow::util::ArrowLogLevel::ARROW_DEBUG)) {
ARROW_LOG(DEBUG) << "ExternalDBPAEncryptorAdapterFactory::GetEncryptor - "
"selected configuration_properties for EXTERNAL_DBPA_V1:";
if (connection_config_for_algorithm.empty()) {
ARROW_LOG(DEBUG) << " <empty map>";
} else {
for (const auto& [k, v] : connection_config_for_algorithm) {
ARROW_LOG(DEBUG) << " [" << k << "]: [" << v << "]";
}
}
}

std::string key_id;
try {
auto key_metadata =
Expand Down Expand Up @@ -659,6 +681,19 @@ std::unique_ptr<DecryptorInterface> ExternalDBPADecryptorAdapterFactory::GetDecr
auto connection_config_for_algorithm = configuration_properties.at(algorithm);
auto key_value_metadata = column_chunk_metadata->key_value_metadata();

if (::arrow::util::ArrowLog::IsLevelEnabled(
::arrow::util::ArrowLogLevel::ARROW_DEBUG)) {
ARROW_LOG(DEBUG) << "ExternalDBPADecryptorAdapterFactory::GetDecryptor - "
"selected configuration_properties for EXTERNAL_DBPA_V1:";
if (connection_config_for_algorithm.empty()) {
ARROW_LOG(DEBUG) << " <empty map>";
} else {
for (const auto& [k, v] : connection_config_for_algorithm) {
ARROW_LOG(DEBUG) << " [" << k << "]: [" << v << "]";
}
}
}

std::string key_id;
try {
auto key_metadata = KeyMetadata::Parse(crypto_metadata->key_metadata());
Expand Down
123 changes: 116 additions & 7 deletions cpp/src/parquet/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ if not thrift_dep.found()
{
'BUILD_COMPILER': 'OFF',
'BUILD_EXAMPLES': 'OFF',
'BUILD_TESTING': 'OFF',
'BUILD_TESTS': 'OFF',
'BUILD_TUTORIALS': 'OFF',
'CMAKE_UNITY_BUILD': 'OFF',
'WITH_AS3': 'OFF',
Expand All @@ -89,6 +91,10 @@ endif

parquet_deps = [arrow_dep, rapidjson_dep, thrift_dep]

# Default to no DBPA test-agent library (only built when encryption+testing are enabled).
dbpa_test_agent_lib = disabler()
dbpa_test_agent_path = ''

if needs_parquet_encryption or get_option('parquet_require_encryption').auto()
openssl_dep = dependency('openssl', required: needs_parquet_encryption)
else
Expand All @@ -99,8 +105,15 @@ if openssl_dep.found()
parquet_deps += openssl_dep

parquet_srcs += files(
'encryption/aes_encryption.cc',
'encryption/crypto_factory.cc',
'encryption/encryption_internal.cc',
'encryption/encoding_properties.cc',
'encryption/encryption_utils.cc',
'encryption/external/dbpa_enum_utils.cc',
'encryption/external/dbpa_executor.cc',
'encryption/external/dbpa_library_wrapper.cc',
'encryption/external/loadable_encryptor_utils.cc',
'encryption/external_dbpa_encryption.cc',
'encryption/file_key_unwrapper.cc',
'encryption/file_key_wrapper.cc',
'encryption/file_system_key_material_store.cc',
Expand All @@ -112,18 +125,87 @@ if openssl_dep.found()
'encryption/local_wrap_kms_client.cc',
'encryption/openssl_internal.cc',
)

# External DBPA integration and its header-only deps are only relevant when
# encryption support is enabled (i.e., OpenSSL is available).
if needs_parquet_encryption or get_option('parquet_require_encryption').auto()
tcb_span_dep = dependency('tcb_span', fallback: ['tcb-span', 'tcb_span_dep'])
magic_enum_dep = dependency(
'magic_enum_header_only',
fallback: ['magic-enum', 'magic_enum_dep'],
)

# DBPS interface is header-only (dbpa_interface.h and friends).
#
# IMPORTANT: Do not use Meson's CMake interpreter for DBPS here. DBPS' CMake
# target graph (CTest/CDash targets, generator expressions, etc.) has proven
# incompatible with Meson's CMake dependency extractor and can fail Meson
# configuration with "Cycle in CMake inputs/dependencies detected".
#
# Instead we use a tiny Meson wrapper (cpp/subprojects/packagefiles/dbps_agent)
# to expose the header-only dependency, and (optionally) build DBPS shared
# libraries via a build-time custom target that calls the real CMake.
dbps_sp = subproject('dbps_agent')
dbps_interface_dep = dbps_sp.get_variable('dbps_interface_dep')

parquet_deps += [dbps_interface_dep, tcb_span_dep, magic_enum_dep]

if get_option('parquet_build_dbps_libs').enabled()
warning(
'Meson does not build DBPS shared libraries (parquet_build_dbps_libs is a no-op). ' +
'Provide your own agent shared library and set configuration_properties["agent_library_path"], ' +
'or build DBPS via its CMake build separately.',
)
endif

# Build the in-tree DBPA test agent shared library used by external encryption tests.
# CMake builds this as `DBPATestAgent` when ARROW_TESTING is enabled; Meson needs an
# equivalent target so tests can dlopen() `libDBPATestAgent.so`.
#
# Keep it Meson-native (no CMake), and place the output next to parquet test
# executables so `TestUtils::GetTestLibraryPath()` can find it via the executable dir.
if needs_testing
dbpa_test_agent_lib = shared_library(
'DBPATestAgent',
sources: files('encryption/external/dbpa_test_agent.cc'),
include_directories: include_directories('..'),
# Keep this test agent as self-contained as possible. It is dlopen()'d
# by tests, so avoid unnecessary runtime dependencies (e.g., libarrow.so)
# which can differ between Meson/CMake CI environments.
dependencies: [magic_enum_dep, tcb_span_dep, dbps_interface_dep],
install: false,
gnu_symbol_visibility: 'default',
)
dbpa_test_agent_path = dbpa_test_agent_lib.full_path()
endif
endif
else
parquet_srcs += files('encryption/encryption_internal_nossl.cc')
parquet_srcs += files('encryption/aes_encryption_nossl.cc')
endif


# Parquet's CMake build uses explicit export macros and (on ELF) a version script
# to control symbol visibility. Meson doesn't currently replicate that machinery.
# With Meson's default hidden visibility, some non-exported-but-test-used symbols
# (e.g. EncodingProperties, IsParquetCipherSupported) are not linkable from test
# executables. When building tests/benchmarks, relax visibility to avoid link
# failures in Meson CI.
parquet_symbol_visibility = 'inlineshidden'
if needs_testing
parquet_symbol_visibility = 'default'
endif

parquet_lib = library(
'arrow-parquet',
sources: parquet_srcs,
dependencies: parquet_deps,
gnu_symbol_visibility: 'inlineshidden',
gnu_symbol_visibility: parquet_symbol_visibility,
)

parquet_dep = declare_dependency(link_with: parquet_lib)
parquet_dep = declare_dependency(
link_with: parquet_lib,
dependencies: parquet_deps,
)

subdir('api')
subdir('arrow')
Expand Down Expand Up @@ -216,6 +298,7 @@ parquet_tests = {
'writer-test': {
'sources': files(
'column_writer_test.cc',
'encryption/external/test_utils.cc',
'file_serialize_test.cc',
'stream_writer_test.cc',
),
Expand All @@ -226,6 +309,7 @@ parquet_tests = {
'arrow/arrow_reader_writer_test.cc',
'arrow/arrow_statistics_test.cc',
'arrow/variant_test.cc',
'encryption/external/test_utils.cc',
),
},
'arrow-internals-test': {
Expand All @@ -240,18 +324,26 @@ parquet_tests = {
'arrow/arrow_schema_test.cc',
),
},
'file_deserialize_test': {'sources': files('file_deserialize_test.cc')},
'file_deserialize_test': {
'sources': files('file_deserialize_test.cc', 'encryption/external/test_utils.cc'),
},
'schema_test': {'sources': files('schema_test.cc')},
}

if needs_parquet_encryption
parquet_tests += {
'encryption-test': {
'sources': files(
'encryption/encryption_internal_test.cc',
'encryption/aes_encryption_test.cc',
'encryption/crypto_factory_test.cc',
'encryption/encoding_properties_test.cc',
'encryption/external/test_utils.cc',
'encryption/external_dbpa_encryption_test.cc',
'encryption/per_column_encryption_test.cc',
'encryption/properties_test.cc',
'encryption/read_configurations_test.cc',
'encryption/test_encryption_util.cc',
'encryption/test_in_memory_kms.cc',
'encryption/write_configurations_test.cc',
),
},
Expand Down Expand Up @@ -297,7 +389,24 @@ foreach key, val : parquet_tests
sources: val['sources'] + files('test_util.cc'),
dependencies: parquet_test_dep,
)
test(test_name, exc)
# Ensure the DBPA test agent is built before running any tests that may load it.
# (No-op when dbpa_test_agent_lib is disabled/unset.)
test(
test_name,
exc,
depends: dbpa_test_agent_lib,
env: {
# Make DBPATestAgent lookup deterministic under Meson. Some CI setups may
# not allow /proc/self/exe resolution or run tests with unexpected cwd.
'PARQUET_TEST_LIBRARY_CWD': meson.current_build_dir(),
# Reuse the standard env var used by Python tooling (`base_app.py`).
# Prefer this in C++ as well.
'DBPA_LIBRARY_PATH': dbpa_test_agent_path,
# Make DBPA-related code emit useful logs in CI (opt-in via env).
# This helps debug why Meson runs see unexpected configuration_properties.
'PARQUET_DBPA_LOG_LEVEL': 'DEBUG',
},
)
endforeach

parquet_benchmarks = {
Expand Down
23 changes: 23 additions & 0 deletions cpp/subprojects/dbps_agent.wrap
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[wrap-file]
directory = DataBatchProtectionService-6206fb0e27556a0df9160364caa3819e4af3fe0f
source_url = https://github.com/protegrity/DataBatchProtectionService/archive/6206fb0e27556a0df9160364caa3819e4af3fe0f.tar.gz
source_filename = dbps_agent-6206fb0e27556a0df9160364caa3819e4af3fe0f.tar.gz
source_hash = 9c95a1fec0c9851867a776c3241d3feb59b07bd7a50e653d6214e07a8ad62419
patch_directory = dbps_agent
Loading
Loading