diff --git a/cpp/src/parquet/encryption/crypto_factory.h b/cpp/src/parquet/encryption/crypto_factory.h index d0b24a70fbb5..19a14a94c0b7 100644 --- a/cpp/src/parquet/encryption/crypto_factory.h +++ b/cpp/src/parquet/encryption/crypto_factory.h @@ -18,12 +18,14 @@ #pragma once #include +#include #include "parquet/encryption/encryption.h" #include "parquet/encryption/file_key_wrapper.h" #include "parquet/encryption/key_toolkit.h" #include "parquet/encryption/kms_client_factory.h" #include "parquet/platform.h" +#include "parquet/types.h" namespace parquet::encryption { @@ -96,7 +98,7 @@ struct PARQUET_EXPORT ColumnEncryptionAttributes { std::string key_id; }; -/// Encryption Configuration for use with External Encryption services. +/// Encryption Configuration for use with External Encryptions. /// Extends the already existing EncryptionConfiguration with more context and with /// the capability of specifying encryption algorithm per column. struct PARQUET_EXPORT ExternalEncryptionConfiguration : public EncryptionConfiguration { @@ -118,21 +120,21 @@ struct PARQUET_EXPORT ExternalEncryptionConfiguration : public EncryptionConfigu /// If a column name appears in both, an exception will be thrown. std::unordered_map per_column_encryption; - /// External encryption services may use additional context provided by the application to - /// enforce robust access control. The values sent to the external service depend on each + /// External encryptors may use additional context provided by the application to + /// enforce robust access control. The values sent to the external encryptor depend on each /// implementation. /// This value must be a valid JSON-formatted string. - /// Validation of the string will be done by the external encryption service, Arrow will only + /// Validation of the string will be done by the external encryptor, Arrow will only /// forward this value. /// Format: "{\"user_id\": \"abc123\", \"location\": {\"lat\": 9.7489, \"lon\": -83.7534}}" std::string app_context; /// Key/value map of the location of configuration files needed by the external - /// encryption service. This may include location of a dynamically-linked library, or the - /// location of a file where the external service can find urls, certificates, and parameters - /// needed to make a remote service call. + /// encryptors. This may include location of a dynamically-linked library, or the + /// location of a file where the external encryptor can find urls, certificates, and parameters + /// needed to make a remote call. /// For security, these values should never be sent in this config, only the locations of - /// the files that the external service will know how to access. + /// the files that the external encryptor will know how to access. std::unordered_map connection_config; }; @@ -143,6 +145,26 @@ struct PARQUET_EXPORT DecryptionConfiguration { double cache_lifetime_seconds = kDefaultCacheLifetimeSeconds; }; +struct PARQUET_EXPORT ExternalDecryptionConfiguration : public DecryptionConfiguration { + /// External decryptors may use additional context provided by the application to + /// enforce robust access control. The values sent to the external decryptor depend on each + /// implementation. + /// This value must be a valid JSON-formatted string. + /// Validation of the string will be done by the external decryptors, Arrow will only + /// forward this value. + /// Format: "{\"user_id\": \"abc123\", \"location\": {\"lat\": 9.7489, \"lon\": -83.7534}}" + std::string app_context; + + /// Map of the encryption algorithms to the key/value map of the location of configuration files + /// needed by the external decryptors. This may include location of a dynamically-linked + /// library, or the location of a file where the external decryptor can find urls, certificates, + /// and parameters needed to make a remote call. + /// For security, these values should never be sent in this config, only the locations of + /// the files that the external decryptor will know how to access. + std::unordered_map> + connection_config; +}; + /// This is a core class, that translates the parameters of high level encryption (like /// the names of encrypted columns, names of master keys, etc), into parameters of low /// level encryption (like the key metadata, DEK, etc). A factory that produces the low @@ -162,8 +184,8 @@ class PARQUET_EXPORT CryptoFactory { const EncryptionConfiguration& encryption_config, const std::string& file_path = "", const std::shared_ptr<::arrow::fs::FileSystem>& file_system = NULLPTR); - /// Get the external encryption properties for a Parquet file. Used when encryption - /// will be provided by an external service. + /// Get the external encryption properties for a Parquet file. Used when an external encryptor + /// will be used to encrypt the file. std::shared_ptr GetExternalFileEncryptionProperties( const KmsConnectionConfig& kms_connection_config, const ExternalEncryptionConfiguration& external_encryption_config, diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 477d8e6f5335..a1cd009cfca7 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -136,6 +136,58 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix return this; } +ExternalFileDecryptionProperties::Builder* ExternalFileDecryptionProperties::Builder::app_context( + const std::string& context) { + if (!app_context_.empty()) { + throw ParquetException("App context already set"); + } + + if (context.empty()) { + return this; + } + + app_context_ = context; + return this; +} + +ExternalFileDecryptionProperties::Builder* +ExternalFileDecryptionProperties::Builder::connection_config( + const std::map>& config) { + if (connection_config_.size() != 0) { + throw ParquetException("Connection config already set"); + } + + if (config.size() == 0) { + return this; + } + + connection_config_ = config; + return this; +} + +std::shared_ptr +ExternalFileDecryptionProperties::Builder::build_external() { + return std::shared_ptr(new ExternalFileDecryptionProperties( + footer_key_, key_retriever_, check_plaintext_footer_integrity_, aad_prefix_, + aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_, + app_context_, connection_config_)); +} + +ExternalFileDecryptionProperties::ExternalFileDecryptionProperties( + const std::string& footer_key, + std::shared_ptr key_retriever, + bool check_plaintext_footer_integrity, const std::string& aad_prefix, + std::shared_ptr aad_prefix_verifier, + const ColumnPathToDecryptionPropertiesMap& column_decryption_properties, + bool plaintext_files_allowed, + const std::string& app_context, + const std::map>& connection_config) + : FileDecryptionProperties(footer_key, key_retriever, check_plaintext_footer_integrity, + aad_prefix, aad_prefix_verifier, column_decryption_properties, + plaintext_files_allowed), + app_context_(app_context), + connection_config_(connection_config) {} + ColumnDecryptionProperties::Builder* ColumnDecryptionProperties::Builder::key( const std::string& key) { if (key.empty()) return this; @@ -145,9 +197,15 @@ ColumnDecryptionProperties::Builder* ColumnDecryptionProperties::Builder::key( return this; } +ColumnDecryptionProperties::Builder* ColumnDecryptionProperties::Builder::parquet_cipher( + ParquetCipher::type parquet_cipher) { + parquet_cipher_ = parquet_cipher; + return this; +} + std::shared_ptr ColumnDecryptionProperties::Builder::build() { return std::shared_ptr( - new ColumnDecryptionProperties(column_path_, key_)); + new ColumnDecryptionProperties(column_path_, key_, parquet_cipher_)); } FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::footer_key_metadata( @@ -211,9 +269,10 @@ ColumnEncryptionProperties::ColumnEncryptionProperties( key_ = key; } -ColumnDecryptionProperties::ColumnDecryptionProperties(const std::string& column_path, - const std::string& key) - : column_path_(column_path) { +ColumnDecryptionProperties::ColumnDecryptionProperties( + const std::string& column_path, const std::string& key, + std::optional parquet_cipher) + : column_path_(column_path), parquet_cipher_(parquet_cipher) { DCHECK(!column_path.empty()); if (!key.empty()) { diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 99888124ad20..63e29972befd 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -127,8 +127,9 @@ class PARQUET_EXPORT ColumnEncryptionProperties { Builder* key_id(const std::string& key_id); /// Set ParquetCipher type to use. - /// This field is declared as optional. If the value is not set, then the ParquetCipher - /// declared in the FileEncryptionProperties will be used. + /// This field is declared as optional, present when per column encryption was used. If the + /// value is not set, then the ParquetCipher declared in the FileEncryptionProperties will be + /// used. Builder* parquet_cipher(ParquetCipher::type parquet_cipher); std::shared_ptr build() { @@ -189,11 +190,17 @@ class PARQUET_EXPORT ColumnDecryptionProperties { /// key length must be either 16, 24 or 32 bytes. Builder* key(const std::string& key); + /// Set ParquetCipher type to use. + /// This field is declared as optional, present when per column encryption was used. If the + /// value is not set, then the ParquetCipher declared in the InternalFileDecryptor will be used. + Builder* parquet_cipher(ParquetCipher::type parquet_cipher); + std::shared_ptr build(); private: const std::string column_path_; std::string key_; + std::optional parquet_cipher_; }; ColumnDecryptionProperties() = default; @@ -205,15 +212,20 @@ class PARQUET_EXPORT ColumnDecryptionProperties { std::string column_path() const { return column_path_; } std::string key() const { return key_; } + /// Check whether the optional has a value before using. + std::optional parquet_cipher() const { return parquet_cipher_; } + private: const std::string column_path_; std::string key_; + std::optional parquet_cipher_; /// This class is only required for setting explicit column decryption keys - /// to override key retriever (or to provide keys when key metadata and/or /// key retriever are not available) explicit ColumnDecryptionProperties(const std::string& column_path, - const std::string& key); + const std::string& key, + std::optional parquet_cipher); }; class PARQUET_EXPORT AADPrefixVerifier { @@ -304,7 +316,7 @@ class PARQUET_EXPORT FileDecryptionProperties { aad_prefix_verifier_, column_decryption_properties_, plaintext_files_allowed_)); } - private: + protected: std::string footer_key_; std::string aad_prefix_; std::shared_ptr aad_prefix_verifier_; @@ -349,6 +361,7 @@ class PARQUET_EXPORT FileDecryptionProperties { bool check_plaintext_footer_integrity_; bool plaintext_files_allowed_; + protected: FileDecryptionProperties( const std::string& footer_key, std::shared_ptr key_retriever, @@ -358,6 +371,47 @@ class PARQUET_EXPORT FileDecryptionProperties { bool plaintext_files_allowed); }; +class PARQUET_EXPORT ExternalFileDecryptionProperties : public FileDecryptionProperties { + public: + class PARQUET_EXPORT Builder : public FileDecryptionProperties::Builder { + public: + Builder() : FileDecryptionProperties::Builder() {} + + Builder* app_context(const std::string& context); + + Builder* connection_config( + const std::map>& config); + + std::shared_ptr build_external(); + + private: + std::string app_context_; + std::map> connection_config_; + }; + + const std::string& app_context() const { + return app_context_; + } + + const std::map>& connection_config() const { + return connection_config_; + } + + private: + std::string app_context_; + std::map> connection_config_; + + ExternalFileDecryptionProperties( + const std::string& footer_key, + std::shared_ptr key_retriever, + bool check_plaintext_footer_integrity, const std::string& aad_prefix, + std::shared_ptr aad_prefix_verifier, + const ColumnPathToDecryptionPropertiesMap& column_decryption_properties, + bool plaintext_files_allowed, + const std::string& app_context, + const std::map>& connection_config); +}; + class PARQUET_EXPORT FileEncryptionProperties { public: class PARQUET_EXPORT Builder { diff --git a/cpp/src/parquet/encryption/properties_test.cc b/cpp/src/parquet/encryption/properties_test.cc index 7dbe963d1e49..7bbced067192 100644 --- a/cpp/src/parquet/encryption/properties_test.cc +++ b/cpp/src/parquet/encryption/properties_test.cc @@ -68,6 +68,26 @@ TEST(TestColumnEncryptionProperties, ColumnParquetCipherSpecified) { ASSERT_EQ(ParquetCipher::AES_GCM_CTR_V1, properties->parquet_cipher().value()); } +TEST(TestColumnDecryptionProperties, ColumnParquetCipherNotSpecified) { + std::string column_path = "column_path"; + ColumnDecryptionProperties::Builder column_builder(column_path); + std::shared_ptr properties = column_builder.build(); + + ASSERT_EQ(column_path, properties->column_path()); + ASSERT_EQ(false, properties->parquet_cipher().has_value()); +} + +TEST(TestColumnDecryptionProperties, ColumnParquetCipherSpecified) { + std::string column_path = "column_path"; + ColumnDecryptionProperties::Builder column_builder(column_path); + column_builder.parquet_cipher(ParquetCipher::AES_GCM_CTR_V1); + std::shared_ptr properties = column_builder.build(); + + ASSERT_EQ(column_path, properties->column_path()); + ASSERT_EQ(true, properties->parquet_cipher().has_value()); + ASSERT_EQ(ParquetCipher::AES_GCM_CTR_V1, properties->parquet_cipher().value()); +} + // Encrypt all columns and the footer with the same key. // (uniform encryption) TEST(TestEncryptionProperties, UniformEncryption) {