2828
2929namespace parquet ::encryption {
3030
31- void CryptoFactory::RegisterKmsClientFactory (
32- std::shared_ptr<KmsClientFactory> kms_client_factory) {
33- key_toolkit_->RegisterKmsClientFactory (std::move (kms_client_factory));
34- }
31+ // / Extracting functionality common to both GetFileEncryptionProperties and
32+ // / GetExternalFileEncryptionProperties here for reuse.
33+ namespace {
3534
36- std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProperties (
37- const KmsConnectionConfig& kms_connection_config,
38- const EncryptionConfiguration& encryption_config, const std::string& file_path,
39- const std::shared_ptr<::arrow::fs::FileSystem>& file_system) {
40- if (!encryption_config.uniform_encryption && encryption_config.column_keys .empty ()) {
41- throw ParquetException (" Either column_keys or uniform_encryption must be set" );
42- } else if (encryption_config.uniform_encryption &&
43- !encryption_config.column_keys .empty ()) {
44- throw ParquetException (" Cannot set both column_keys and uniform_encryption" );
45- }
46- const std::string& footer_key_id = encryption_config.footer_key ;
47- const std::string& column_key_str = encryption_config.column_keys ;
35+ // Struct to simplify the returned objects in GetFileKeyUtils.
36+ struct FileKeyUtils {
37+ std::shared_ptr<FileKeyMaterialStore> key_material_store;
38+ FileKeyWrapper key_wrapper;
39+ };
4840
41+ FileKeyUtils GetFileKeyUtils (
42+ const std::shared_ptr<KeyToolkit>& key_toolkit,
43+ const KmsConnectionConfig& kms_connection_config,
44+ const EncryptionConfiguration& encryption_config,
45+ const std::string& file_path, const std::shared_ptr<::arrow::fs::FileSystem>& file_system) {
4946 std::shared_ptr<FileKeyMaterialStore> key_material_store = nullptr ;
5047 if (!encryption_config.internal_key_material ) {
5148 try {
@@ -58,18 +55,46 @@ std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProper
5855 }
5956 }
6057
61- FileKeyWrapper key_wrapper (key_toolkit_ .get (), kms_connection_config,
58+ FileKeyWrapper key_wrapper (key_toolkit .get (), kms_connection_config,
6259 key_material_store, encryption_config.cache_lifetime_seconds ,
6360 encryption_config.double_wrapping );
6461
65- int32_t dek_length_bits = encryption_config.data_key_length_bits ;
62+ return {key_material_store, std::move (key_wrapper)};
63+ }
64+
65+ int ValidateAndGetKeyLength (int32_t dek_length_bits) {
6666 if (!internal::ValidateKeyLength (dek_length_bits)) {
6767 std::ostringstream ss;
6868 ss << " Wrong data key length : " << dek_length_bits;
6969 throw ParquetException (ss.str ());
7070 }
71+ return dek_length_bits / 8 ;
72+ }
7173
72- int dek_length = dek_length_bits / 8 ;
74+ } // Anonymous namespace
75+
76+ void CryptoFactory::RegisterKmsClientFactory (
77+ std::shared_ptr<KmsClientFactory> kms_client_factory) {
78+ key_toolkit_->RegisterKmsClientFactory (std::move (kms_client_factory));
79+ }
80+
81+ std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProperties (
82+ const KmsConnectionConfig& kms_connection_config,
83+ const EncryptionConfiguration& encryption_config, const std::string& file_path,
84+ const std::shared_ptr<::arrow::fs::FileSystem>& file_system) {
85+ if (!encryption_config.uniform_encryption && encryption_config.column_keys .empty ()) {
86+ throw ParquetException (" Either column_keys or uniform_encryption must be set" );
87+ } else if (encryption_config.uniform_encryption &&
88+ !encryption_config.column_keys .empty ()) {
89+ throw ParquetException (" Cannot set both column_keys and uniform_encryption" );
90+ }
91+ const std::string& footer_key_id = encryption_config.footer_key ;
92+ const std::string& column_key_str = encryption_config.column_keys ;
93+
94+ auto [key_material_store, key_wrapper] = GetFileKeyUtils (
95+ key_toolkit_, kms_connection_config, encryption_config, file_path, file_system);
96+
97+ int dek_length = ValidateAndGetKeyLength (encryption_config.data_key_length_bits );
7398
7499 std::string footer_key (dek_length, ' \0 ' );
75100 RandBytes (reinterpret_cast <uint8_t *>(footer_key.data ()), footer_key.size ());
@@ -98,6 +123,92 @@ std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProper
98123 return properties_builder.build ();
99124}
100125
126+ std::shared_ptr<ExternalFileEncryptionProperties>
127+ CryptoFactory::GetExternalFileEncryptionProperties (
128+ const KmsConnectionConfig& kms_connection_config,
129+ const ExternalEncryptionConfiguration& external_encryption_config,
130+ const std::string& file_path, const std::shared_ptr<::arrow::fs::FileSystem>& file_system) {
131+ // Validate the same rules as FileEncryptionProperties but considering per_column_encryption too.
132+ // If uniform_encryption is not set then either column_keys or per_column_encryption must have
133+ // values.
134+ // If uniform_encryption is set, then both column_keys and per_column_encryption must be empty.
135+ bool no_columns_encrypted = external_encryption_config.column_keys .empty () &&
136+ external_encryption_config.per_column_encryption .empty ();
137+ if (!external_encryption_config.uniform_encryption && no_columns_encrypted) {
138+ throw ParquetException (
139+ " Either uniform_encryption must be set or column encryption must be specified in either "
140+ " column_keys or per_column_encryption" );
141+ } else if (external_encryption_config.uniform_encryption && !no_columns_encrypted) {
142+ throw ParquetException (" Cannot set both column encryption and uniform_encryption" );
143+ }
144+
145+ auto [key_material_store, key_wrapper] = GetFileKeyUtils (
146+ key_toolkit_, kms_connection_config, external_encryption_config, file_path, file_system);
147+
148+ int dek_length = ValidateAndGetKeyLength (external_encryption_config.data_key_length_bits );
149+
150+ std::string footer_key (dek_length, ' \0 ' );
151+ RandBytes (reinterpret_cast <uint8_t *>(footer_key.data ()), footer_key.size ());
152+
153+ std::string footer_key_metadata =
154+ key_wrapper.GetEncryptionKeyMetadata (footer_key, external_encryption_config.footer_key , true );
155+
156+ ExternalFileEncryptionProperties::Builder external_properties_builder =
157+ ExternalFileEncryptionProperties::Builder (external_encryption_config.footer_key );
158+ external_properties_builder.footer_key_metadata (footer_key_metadata);
159+ external_properties_builder.algorithm (external_encryption_config.encryption_algorithm );
160+
161+ if (!external_encryption_config.uniform_encryption &&
162+ external_encryption_config.plaintext_footer ) {
163+ external_properties_builder.set_plaintext_footer ();
164+ }
165+
166+ ColumnPathToEncryptionPropertiesMap encrypted_columns;
167+ if (!external_encryption_config.column_keys .empty ()) {
168+ encrypted_columns = GetColumnEncryptionProperties (
169+ dek_length, external_encryption_config.column_keys , &key_wrapper);
170+ }
171+ if (!external_encryption_config.per_column_encryption .empty ()) {
172+ for (const auto & pair : external_encryption_config.per_column_encryption ) {
173+ const std::string& column_name = pair.first ;
174+ const ColumnEncryptionAttributes& attributes = pair.second ;
175+
176+ // Validate column names are not in both column_keys and per_column_encryption maps.
177+ if (encrypted_columns.find (column_name) != encrypted_columns.end ()) {
178+ std::stringstream string_stream;
179+ string_stream << " Multiple keys defined for column [" << column_name << " ]. " ;
180+ string_stream << " Keys found in column_keys and in per_column_encryption." ;
181+ throw ParquetException (string_stream.str ());
182+ }
183+
184+ // TODO(sbrenes): Check whether the attributes.parquet_cipher == EXTERNAL.
185+ // If so, do not use KMS to resolve the column_key, just forward it.
186+ std::string column_key (dek_length, ' \0 ' );
187+ RandBytes (reinterpret_cast <uint8_t *>(column_key.data ()), column_key.size ());
188+ std::string column_key_metadata =
189+ key_wrapper.GetEncryptionKeyMetadata (column_key, attributes.key_id , false );
190+
191+ std::shared_ptr<ColumnEncryptionProperties> column_properties =
192+ ColumnEncryptionProperties::Builder (column_name)
193+ .key (column_key)
194+ ->key_metadata (column_key_metadata)
195+ ->parquet_cipher (attributes.parquet_cipher )
196+ ->build ();
197+
198+ encrypted_columns.insert ({column_name, column_properties});
199+ }
200+ }
201+ if (!encrypted_columns.empty ()) {
202+ external_properties_builder.encrypted_columns (encrypted_columns);
203+ }
204+
205+ if (key_material_store != nullptr ) {
206+ key_material_store->SaveMaterial ();
207+ }
208+
209+ return external_properties_builder.build_external ();
210+ }
211+
101212ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties (
102213 int dek_length, const std::string& column_keys, FileKeyWrapper* key_wrapper) {
103214 ColumnPathToEncryptionPropertiesMap encrypted_columns;
0 commit comments