diff --git a/.gitignore b/.gitignore index 8354aa8f8164..b1e2d117fd43 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# Basic change to trigger testing PR. + apache-rat-*.jar apache-arrow.tar.gz diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc index 91d813530d43..8010a8301b49 100644 --- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc @@ -42,6 +42,8 @@ #include "parquet/encryption/kms_client.h" #include "parquet/encryption/test_in_memory_kms.h" +// Basic change to trigger testing PR. + using arrow::util::SecureString; const SecureString kFooterKeyMasterKey("0123456789012345"); diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index 50f3bd9a15e0..e43e3500a9d8 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -59,6 +59,8 @@ // ---------------------------------------------------------------------- // file compatibility stuff +// Basic change to trigger testing PR. + #ifdef _WIN32 # include # include diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h index e9f218b5205b..d8d11afe9b18 100644 --- a/cpp/src/arrow/util/io_util.h +++ b/cpp/src/arrow/util/io_util.h @@ -38,6 +38,8 @@ #include "arrow/util/macros.h" #include "arrow/util/windows_fixup.h" +// Basic change to trigger testing PR. + namespace arrow::internal { // NOTE: 8-bit path strings on Windows are encoded using UTF-8. diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index 0ee973f2a2d6..99132c2809d5 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -11,6 +11,8 @@ #include +// Basic change to trigger testing PR. + namespace parquet { namespace format { int _kTypeValues[] = { diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index 1f1e254f5cf2..172ef347cd2a 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -20,6 +20,8 @@ #include "parquet/windows_compatibility.h" +// Basic change to trigger testing PR. + namespace parquet { namespace format { /** diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc index 78e3e2549394..5c93d6b4b369 100644 --- a/cpp/src/generated/parquet_types.tcc +++ b/cpp/src/generated/parquet_types.tcc @@ -9,6 +9,8 @@ #include "parquet_types.h" +/* Basic change to trigger test PR. */ + namespace parquet { namespace format { template diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index dc7d40d2a386..27c20e6662bd 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# Basic change to trigger testing PR. + add_custom_target(parquet-all) add_custom_target(parquet) add_custom_target(parquet-benchmarks) diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index e081b428e24f..efe19e018ed4 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #ifdef _MSC_VER # pragma warning(push) // Disable forcing value to bool warnings diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index d42fdc5034ab..cecc4780f83b 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -75,6 +75,8 @@ using ParquetReader = parquet::ParquetFileReader; using parquet::internal::RecordReader; +// Basic change to trigger testing PR. + namespace bit_util = arrow::bit_util; namespace parquet::arrow { diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 9c314cf81817..05f657600fb5 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -68,6 +68,8 @@ namespace parquet { namespace { +// Basic change to trigger testing PR. + // The minimum number of repetition/definition levels to decode at a time, for // better vectorized performance when doing many smaller record reads constexpr int64_t kMinLevelBatchSize = 1024; diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index ac4469b1904f..f5f77ed7fb28 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -56,6 +56,8 @@ static constexpr uint32_t kDefaultMaxPageHeaderSize = 16 * 1024 * 1024; // 16 KB is the default expected page header size static constexpr uint32_t kDefaultPageHeaderSize = 16 * 1024; +// Basic change to trigger testing PR. + // \brief DataPageStats stores encoded statistics and number of values/rows for // a page. struct PARQUET_EXPORT DataPageStats { diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 1f3d64f6228c..69b680298ea4 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -78,6 +78,8 @@ namespace parquet { namespace { +// Basic change to trigger testing PR. + // Visitor that extracts the value buffer from a FlatArray at a given offset. struct ValueBufferSlicer { template diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 990125df4e37..5a26912ab753 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include #include #include diff --git a/cpp/src/parquet/encryption/CMakeLists.txt b/cpp/src/parquet/encryption/CMakeLists.txt index b4c977fccd18..c20ae7e2c2f1 100644 --- a/cpp/src/parquet/encryption/CMakeLists.txt +++ b/cpp/src/parquet/encryption/CMakeLists.txt @@ -15,5 +15,7 @@ # specific language governing permissions and limitations # under the License. +# Basic change to trigger testing PR. + # Headers: public api arrow_install_all_headers("parquet/encryption") diff --git a/cpp/src/parquet/encryption/crypto_factory.cc b/cpp/src/parquet/encryption/crypto_factory.cc index 50b074537884..c5a8fdaf7444 100644 --- a/cpp/src/parquet/encryption/crypto_factory.cc +++ b/cpp/src/parquet/encryption/crypto_factory.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include #include "arrow/util/logging.h" diff --git a/cpp/src/parquet/encryption/crypto_factory.h b/cpp/src/parquet/encryption/crypto_factory.h index 3c6a5f29ea88..ed458a0cbbc8 100644 --- a/cpp/src/parquet/encryption/crypto_factory.h +++ b/cpp/src/parquet/encryption/crypto_factory.h @@ -25,6 +25,8 @@ #include "parquet/encryption/kms_client_factory.h" #include "parquet/platform.h" +// Basic change to trigger testing PR. + namespace parquet::encryption { static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm = diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 52c809aa2f3b..d7ebeb39a7f4 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -26,6 +26,8 @@ #include "arrow/util/utf8.h" #include "parquet/encryption/encryption_internal.h" +// Basic change to trigger testing PR. + using ::arrow::util::SecureString; namespace parquet { diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index d822cc3c1845..5971f100f738 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -28,6 +28,8 @@ #include "parquet/schema.h" #include "parquet/types.h" +// Basic change to trigger testing PR. + namespace parquet { static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm = diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc index 9400fae0adf7..4a9c939d1293 100644 --- a/cpp/src/parquet/encryption/encryption_internal.cc +++ b/cpp/src/parquet/encryption/encryption_internal.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include "parquet/encryption/encryption_internal.h" #include diff --git a/cpp/src/parquet/encryption/encryption_internal.h b/cpp/src/parquet/encryption/encryption_internal.h index 062527495659..6bb7989bedb8 100644 --- a/cpp/src/parquet/encryption/encryption_internal.h +++ b/cpp/src/parquet/encryption/encryption_internal.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #pragma once #include diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc b/cpp/src/parquet/encryption/encryption_internal_nossl.cc index 2450f8654d6f..edb28fb3344a 100644 --- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc +++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include "parquet/encryption/encryption_internal.h" #include "parquet/exception.h" diff --git a/cpp/src/parquet/encryption/encryption_internal_test.cc b/cpp/src/parquet/encryption/encryption_internal_test.cc index bf6607e32877..94ec6b474a35 100644 --- a/cpp/src/parquet/encryption/encryption_internal_test.cc +++ b/cpp/src/parquet/encryption/encryption_internal_test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include #include "parquet/encryption/encryption_internal.h" diff --git a/cpp/src/parquet/encryption/file_key_wrapper.cc b/cpp/src/parquet/encryption/file_key_wrapper.cc index fd870ed1f3bb..a187a2dcc611 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.cc +++ b/cpp/src/parquet/encryption/file_key_wrapper.cc @@ -22,6 +22,8 @@ #include "parquet/encryption/key_toolkit_internal.h" #include "parquet/exception.h" +// Basic change to trigger testing PR. + using ::arrow::util::SecureString; namespace parquet::encryption { diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index b90d31585597..608009f0df62 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include "parquet/encryption/internal_file_decryptor.h" #include "arrow/util/logging.h" diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.h b/cpp/src/parquet/encryption/internal_file_decryptor.h index a365b4df4bf9..bc3509205420 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.h +++ b/cpp/src/parquet/encryption/internal_file_decryptor.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #pragma once #include diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.cc b/cpp/src/parquet/encryption/internal_file_encryptor.cc index 3623aa05c662..4d005b4c9dea 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_encryptor.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include "parquet/encryption/internal_file_encryptor.h" #include "arrow/util/secure_string.h" #include "parquet/encryption/encryption.h" diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.h b/cpp/src/parquet/encryption/internal_file_encryptor.h index ee15fe32de96..4dc947a95dd6 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.h +++ b/cpp/src/parquet/encryption/internal_file_encryptor.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #pragma once #include diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index 60a8a52206c3..99ff5bb8bb49 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include "arrow/util/base64.h" #include "arrow/util/secure_string.h" diff --git a/cpp/src/parquet/encryption/properties_test.cc b/cpp/src/parquet/encryption/properties_test.cc index 1ceda7ac032f..60b6b82584e3 100644 --- a/cpp/src/parquet/encryption/properties_test.cc +++ b/cpp/src/parquet/encryption/properties_test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include #include diff --git a/cpp/src/parquet/encryption/read_configurations_test.cc b/cpp/src/parquet/encryption/read_configurations_test.cc index 15c1e7df7365..36578c499b4c 100644 --- a/cpp/src/parquet/encryption/read_configurations_test.cc +++ b/cpp/src/parquet/encryption/read_configurations_test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include #include diff --git a/cpp/src/parquet/encryption/write_configurations_test.cc b/cpp/src/parquet/encryption/write_configurations_test.cc index ae86b51242a1..df9c775579cc 100644 --- a/cpp/src/parquet/encryption/write_configurations_test.cc +++ b/cpp/src/parquet/encryption/write_configurations_test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include #include diff --git a/cpp/src/parquet/file_deserialize_test.cc b/cpp/src/parquet/file_deserialize_test.cc index 7fa5e2f167e2..3c4b7fa4bd10 100644 --- a/cpp/src/parquet/file_deserialize_test.cc +++ b/cpp/src/parquet/file_deserialize_test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include #include diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc index b246feaf732b..1b607cefd188 100644 --- a/cpp/src/parquet/file_reader.cc +++ b/cpp/src/parquet/file_reader.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include "parquet/file_reader.h" #include diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc index 8c19aecb0df4..6e066abd3f39 100644 --- a/cpp/src/parquet/file_writer.cc +++ b/cpp/src/parquet/file_writer.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include "parquet/file_writer.h" #include diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 4b1822c0dae1..93850359600b 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include "parquet/metadata.h" #include diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h index 3380adbf56aa..3a0e6d478731 100644 --- a/cpp/src/parquet/metadata.h +++ b/cpp/src/parquet/metadata.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #pragma once #include diff --git a/cpp/src/parquet/page_index.cc b/cpp/src/parquet/page_index.cc index 5c2e78c68db5..bd1c6bc52461 100644 --- a/cpp/src/parquet/page_index.cc +++ b/cpp/src/parquet/page_index.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include "parquet/page_index.h" #include "parquet/encoding.h" #include "parquet/encryption/encryption_internal.h" diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift index e3cc5adb9648..1296760142ae 100644 --- a/cpp/src/parquet/parquet.thrift +++ b/cpp/src/parquet/parquet.thrift @@ -17,6 +17,8 @@ * under the License. */ + /** Basic change to trigger test PR */ + /** * File format description for the parquet file format */ diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 8f82adae928d..cef778f571ff 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #pragma once #include "parquet/windows_compatibility.h" diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index f545f5122018..7e6693995652 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include #include #include diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index 7e8a18fc94d6..8be4b9b95a9b 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #pragma once #include diff --git a/cpp/src/parquet/types_test.cc b/cpp/src/parquet/types_test.cc index 6c77662d58f7..91472e412b48 100644 --- a/cpp/src/parquet/types_test.cc +++ b/cpp/src/parquet/types_test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include #include diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index 94365f0f7c76..1b6f6bf1f283 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# Basic change to trigger testing PR. + # distutils: language = c++ # cython: language_level = 3 diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index d59c70a27447..b0582a3c4a77 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -18,6 +18,8 @@ # cython: profile=False # distutils: language = c++ +# Basic change to trigger testing PR. + from collections.abc import Sequence from textwrap import indent import warnings diff --git a/python/pyarrow/_parquet_encryption.pxd b/python/pyarrow/_parquet_encryption.pxd index d52669501a40..2c83c9f34087 100644 --- a/python/pyarrow/_parquet_encryption.pxd +++ b/python/pyarrow/_parquet_encryption.pxd @@ -18,6 +18,8 @@ # distutils: language = c++ # cython: language_level = 3 +# Basic change to trigger testing PR. + from pyarrow.includes.common cimport * from pyarrow.includes.libparquet_encryption cimport * from pyarrow._parquet cimport (ParquetCipher, diff --git a/python/pyarrow/_parquet_encryption.pyx b/python/pyarrow/_parquet_encryption.pyx index f95464e3031d..704008ed4acd 100644 --- a/python/pyarrow/_parquet_encryption.pyx +++ b/python/pyarrow/_parquet_encryption.pyx @@ -18,6 +18,8 @@ # cython: profile=False # distutils: language = c++ +# Basic change to trigger testing PR. + from datetime import timedelta from cpython.bytes cimport PyBytes_FromStringAndSize diff --git a/python/pyarrow/includes/libparquet_encryption.pxd b/python/pyarrow/includes/libparquet_encryption.pxd index 7e031925af60..2192a325d459 100644 --- a/python/pyarrow/includes/libparquet_encryption.pxd +++ b/python/pyarrow/includes/libparquet_encryption.pxd @@ -17,6 +17,8 @@ # distutils: language = c++ +# Basic change to trigger testing PR. + from pyarrow.includes.common cimport * from pyarrow.includes.libarrow cimport CSecureString from pyarrow._parquet cimport (ParquetCipher, diff --git a/python/pyarrow/parquet/encryption.py b/python/pyarrow/parquet/encryption.py index df6eed913fa5..dad71c92a110 100644 --- a/python/pyarrow/parquet/encryption.py +++ b/python/pyarrow/parquet/encryption.py @@ -16,6 +16,8 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + +# Basic change to trigger testing PR. from pyarrow._parquet_encryption import (CryptoFactory, # noqa EncryptionConfiguration, DecryptionConfiguration, diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.cc b/python/pyarrow/src/arrow/python/parquet_encryption.cc index 1016cdd3a375..4d764363e246 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.cc +++ b/python/pyarrow/src/arrow/python/parquet_encryption.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #include "arrow/python/parquet_encryption.h" #include "parquet/exception.h" diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h index 3e57a7619454..a8c84641ca1b 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.h +++ b/python/pyarrow/src/arrow/python/parquet_encryption.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// Basic change to trigger testing PR. + #pragma once #include diff --git a/python/scripts/base_app.py b/python/scripts/base_app.py new file mode 100644 index 000000000000..080b3c709cf7 --- /dev/null +++ b/python/scripts/base_app.py @@ -0,0 +1,125 @@ +""" +base_app.py + +@author sbrenes +""" + +import base64 +import datetime +import pyarrow +import pyarrow.parquet as pp +import pyarrow.parquet.encryption as ppe + +class FooKmsClient(ppe.KmsClient): + def __init__(self, kms_connection_config): + ppe.KmsClient.__init__(self) + self.master_keys_map = kms_connection_config.custom_kms_conf + + def wrap_key(self, key_bytes, master_key_identifier): + master_key_bytes = self.master_keys_map[master_key_identifier].encode('utf-8') + joint_key = b"".join([master_key_bytes, key_bytes]) + return base64.b64encode(joint_key) + + def unwrap_key(self, wrapped_key, master_key_identifier): + expected_master = self.master_keys_map[master_key_identifier] + decoded_key = base64.b64decode(wrapped_key) + master_key_bytes = decoded_key[:16] + decrypted_key = decoded_key[16:] + if (expected_master == master_key_bytes.decode('utf-8')): + return decrypted_key + raise ValueError(f"Bad master key used [{master_key_bytes}] - [{decrypted_key}]") + +def kms_client_factory(kms_connection_config): + return FooKmsClient(kms_connection_config) + +def write_parquet(table, location, encryption_config=None): + encryption_properties = None + + if encryption_config: + crypto_factory = ppe.CryptoFactory(kms_client_factory) + encryption_properties = crypto_factory.file_encryption_properties( + get_kms_connection_config(), encryption_config) + + writer = pp.ParquetWriter(location, table.schema, encryption_properties=encryption_properties) + writer.write_table(table) + +def encrypted_data_and_footer_sample(data_table): + parquet_path = "sample.parquet" + encryption_config = get_encryption_config() + write_parquet(data_table, parquet_path, encryption_config=encryption_config) + print(f"Written to [{parquet_path}]") + +def create_and_encrypt_parquet(): + sample_data = { + "orderId": [1001, 1002, 1003], + "productId": [152, 268, 6548], + "price": [3.25, 6.48, 2.12], + "vat": [0.0, 0.2, 0.05], + "customer_name": ["Alice", "Bob", "Charlotte"] + } + data_table = pyarrow.Table.from_pydict(sample_data) + + print("\nWriting parquet.") + + encrypted_data_and_footer_sample(data_table) + +def read_and_print_parquet(): + print("\n-----------------------------------------------\nNow reading parquet file") + parquet_path = "sample.parquet" + + metadata = pp.read_metadata(parquet_path) + print("\nMetadata:") + print(metadata) + print("\n") + + decryption_config = get_decryption_config() + read_data_table = read_parquet(parquet_path, decryption_config=decryption_config) + data_frame = read_data_table.to_pandas() + print("\Decrypted data:") + print(data_frame.head()) + print("\n") + +def read_parquet(location, decryption_config=None, read_metadata=False): + decryption_properties = None + + if decryption_config: + crypto_factory = ppe.CryptoFactory(kms_client_factory) + decryption_properties = crypto_factory.file_decryption_properties( + get_kms_connection_config(), decryption_config) + + if read_metadata: + metadata = pp.read_metadata(location, decryption_properties=decryption_properties) + return metadata + + data_table = pp.ParquetFile(location, decryption_properties=decryption_properties).read() + return data_table + +def get_kms_connection_config(): + return ppe.KmsConnectionConfig( + custom_kms_conf={ + "footer_key": "012footer_secret", + "orderid_key": "column_secret001", + "productid_key": "column_secret002", + "price_key": "column_secret003", + "customer_key": "column_secret004" + } + ) +def get_encryption_config(plaintext_footer=True): + return ppe.EncryptionConfiguration( + footer_key = "footer_key", + column_keys = { + "orderid_key": ["orderId"], + "productid_key": ["productId"] + }, + encryption_algorithm = "AES_GCM_V1", + cache_lifetime=datetime.timedelta(minutes=2.0), + data_key_length_bits = 128, + plaintext_footer=plaintext_footer + ) +def get_decryption_config(): + return ppe.DecryptionConfiguration(cache_lifetime=datetime.timedelta(minutes=2.0)) + +if __name__ == "__main__": + create_and_encrypt_parquet() + read_and_print_parquet() + print("\nPlayground finished!\n")