Skip to content

Commit 67eda38

Browse files
Add GetExternalFileEncryptionProperties method to C++ CryptoFactory
Add GetExternalFileEncryptionProperties method to C++ CryptoFactory
2 parents 59a3c44 + ddd4f3b commit 67eda38

File tree

6 files changed

+490
-23
lines changed

6 files changed

+490
-23
lines changed

cpp/src/parquet/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,9 @@ if(PARQUET_REQUIRE_ENCRYPTION)
413413
encryption/write_configurations_test.cc
414414
encryption/read_configurations_test.cc
415415
encryption/properties_test.cc
416-
encryption/test_encryption_util.cc)
416+
encryption/crypto_factory_test.cc
417+
encryption/test_encryption_util.cc
418+
encryption/test_in_memory_kms.cc)
417419
add_parquet_test(encryption-key-management-test
418420
SOURCES
419421
encryption/key_management_test.cc

cpp/src/parquet/encryption/crypto_factory.cc

Lines changed: 130 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,21 @@
2828

2929
namespace parquet::encryption {
3030

31-
void CryptoFactory::RegisterKmsClientFactory(
32-
std::shared_ptr<KmsClientFactory> kms_client_factory) {
33-
key_toolkit_->RegisterKmsClientFactory(std::move(kms_client_factory));
34-
}
31+
/// Extracting functionality common to both GetFileEncryptionProperties and
32+
/// GetExternalFileEncryptionProperties here for reuse.
33+
namespace {
3534

36-
std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProperties(
37-
const KmsConnectionConfig& kms_connection_config,
38-
const EncryptionConfiguration& encryption_config, const std::string& file_path,
39-
const std::shared_ptr<::arrow::fs::FileSystem>& file_system) {
40-
if (!encryption_config.uniform_encryption && encryption_config.column_keys.empty()) {
41-
throw ParquetException("Either column_keys or uniform_encryption must be set");
42-
} else if (encryption_config.uniform_encryption &&
43-
!encryption_config.column_keys.empty()) {
44-
throw ParquetException("Cannot set both column_keys and uniform_encryption");
45-
}
46-
const std::string& footer_key_id = encryption_config.footer_key;
47-
const std::string& column_key_str = encryption_config.column_keys;
35+
// Struct to simplify the returned objects in GetFileKeyUtils.
36+
struct FileKeyUtils {
37+
std::shared_ptr<FileKeyMaterialStore> key_material_store;
38+
FileKeyWrapper key_wrapper;
39+
};
4840

41+
FileKeyUtils GetFileKeyUtils(
42+
const std::shared_ptr<KeyToolkit>& key_toolkit,
43+
const KmsConnectionConfig& kms_connection_config,
44+
const EncryptionConfiguration& encryption_config,
45+
const std::string& file_path, const std::shared_ptr<::arrow::fs::FileSystem>& file_system) {
4946
std::shared_ptr<FileKeyMaterialStore> key_material_store = nullptr;
5047
if (!encryption_config.internal_key_material) {
5148
try {
@@ -58,18 +55,46 @@ std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProper
5855
}
5956
}
6057

61-
FileKeyWrapper key_wrapper(key_toolkit_.get(), kms_connection_config,
58+
FileKeyWrapper key_wrapper(key_toolkit.get(), kms_connection_config,
6259
key_material_store, encryption_config.cache_lifetime_seconds,
6360
encryption_config.double_wrapping);
6461

65-
int32_t dek_length_bits = encryption_config.data_key_length_bits;
62+
return {key_material_store, std::move(key_wrapper)};
63+
}
64+
65+
int ValidateAndGetKeyLength(int32_t dek_length_bits) {
6666
if (!internal::ValidateKeyLength(dek_length_bits)) {
6767
std::ostringstream ss;
6868
ss << "Wrong data key length : " << dek_length_bits;
6969
throw ParquetException(ss.str());
7070
}
71+
return dek_length_bits / 8;
72+
}
7173

72-
int dek_length = dek_length_bits / 8;
74+
} // Anonymous namespace
75+
76+
void CryptoFactory::RegisterKmsClientFactory(
77+
std::shared_ptr<KmsClientFactory> kms_client_factory) {
78+
key_toolkit_->RegisterKmsClientFactory(std::move(kms_client_factory));
79+
}
80+
81+
std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProperties(
82+
const KmsConnectionConfig& kms_connection_config,
83+
const EncryptionConfiguration& encryption_config, const std::string& file_path,
84+
const std::shared_ptr<::arrow::fs::FileSystem>& file_system) {
85+
if (!encryption_config.uniform_encryption && encryption_config.column_keys.empty()) {
86+
throw ParquetException("Either column_keys or uniform_encryption must be set");
87+
} else if (encryption_config.uniform_encryption &&
88+
!encryption_config.column_keys.empty()) {
89+
throw ParquetException("Cannot set both column_keys and uniform_encryption");
90+
}
91+
const std::string& footer_key_id = encryption_config.footer_key;
92+
const std::string& column_key_str = encryption_config.column_keys;
93+
94+
auto [key_material_store, key_wrapper] = GetFileKeyUtils(
95+
key_toolkit_, kms_connection_config, encryption_config, file_path, file_system);
96+
97+
int dek_length = ValidateAndGetKeyLength(encryption_config.data_key_length_bits);
7398

7499
std::string footer_key(dek_length, '\0');
75100
RandBytes(reinterpret_cast<uint8_t*>(footer_key.data()), footer_key.size());
@@ -98,6 +123,92 @@ std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProper
98123
return properties_builder.build();
99124
}
100125

126+
std::shared_ptr<ExternalFileEncryptionProperties>
127+
CryptoFactory::GetExternalFileEncryptionProperties(
128+
const KmsConnectionConfig& kms_connection_config,
129+
const ExternalEncryptionConfiguration& external_encryption_config,
130+
const std::string& file_path, const std::shared_ptr<::arrow::fs::FileSystem>& file_system) {
131+
// Validate the same rules as FileEncryptionProperties but considering per_column_encryption too.
132+
// If uniform_encryption is not set then either column_keys or per_column_encryption must have
133+
// values.
134+
// If uniform_encryption is set, then both column_keys and per_column_encryption must be empty.
135+
bool no_columns_encrypted = external_encryption_config.column_keys.empty() &&
136+
external_encryption_config.per_column_encryption.empty();
137+
if (!external_encryption_config.uniform_encryption && no_columns_encrypted) {
138+
throw ParquetException(
139+
"Either uniform_encryption must be set or column encryption must be specified in either "
140+
"column_keys or per_column_encryption");
141+
} else if (external_encryption_config.uniform_encryption && !no_columns_encrypted) {
142+
throw ParquetException("Cannot set both column encryption and uniform_encryption");
143+
}
144+
145+
auto [key_material_store, key_wrapper] = GetFileKeyUtils(
146+
key_toolkit_, kms_connection_config, external_encryption_config, file_path, file_system);
147+
148+
int dek_length = ValidateAndGetKeyLength(external_encryption_config.data_key_length_bits);
149+
150+
std::string footer_key(dek_length, '\0');
151+
RandBytes(reinterpret_cast<uint8_t*>(footer_key.data()), footer_key.size());
152+
153+
std::string footer_key_metadata =
154+
key_wrapper.GetEncryptionKeyMetadata(footer_key, external_encryption_config.footer_key, true);
155+
156+
ExternalFileEncryptionProperties::Builder external_properties_builder =
157+
ExternalFileEncryptionProperties::Builder(external_encryption_config.footer_key);
158+
external_properties_builder.footer_key_metadata(footer_key_metadata);
159+
external_properties_builder.algorithm(external_encryption_config.encryption_algorithm);
160+
161+
if (!external_encryption_config.uniform_encryption &&
162+
external_encryption_config.plaintext_footer) {
163+
external_properties_builder.set_plaintext_footer();
164+
}
165+
166+
ColumnPathToEncryptionPropertiesMap encrypted_columns;
167+
if (!external_encryption_config.column_keys.empty()) {
168+
encrypted_columns = GetColumnEncryptionProperties(
169+
dek_length, external_encryption_config.column_keys, &key_wrapper);
170+
}
171+
if (!external_encryption_config.per_column_encryption.empty()) {
172+
for (const auto& pair : external_encryption_config.per_column_encryption) {
173+
const std::string& column_name = pair.first;
174+
const ColumnEncryptionAttributes& attributes = pair.second;
175+
176+
// Validate column names are not in both column_keys and per_column_encryption maps.
177+
if (encrypted_columns.find(column_name) != encrypted_columns.end()) {
178+
std::stringstream string_stream;
179+
string_stream << "Multiple keys defined for column [" << column_name << "]. ";
180+
string_stream << "Keys found in column_keys and in per_column_encryption.";
181+
throw ParquetException(string_stream.str());
182+
}
183+
184+
// TODO(sbrenes): Check whether the attributes.parquet_cipher == EXTERNAL.
185+
// If so, do not use KMS to resolve the column_key, just forward it.
186+
std::string column_key(dek_length, '\0');
187+
RandBytes(reinterpret_cast<uint8_t*>(column_key.data()), column_key.size());
188+
std::string column_key_metadata =
189+
key_wrapper.GetEncryptionKeyMetadata(column_key, attributes.key_id, false);
190+
191+
std::shared_ptr<ColumnEncryptionProperties> column_properties =
192+
ColumnEncryptionProperties::Builder(column_name)
193+
.key(column_key)
194+
->key_metadata(column_key_metadata)
195+
->parquet_cipher(attributes.parquet_cipher)
196+
->build();
197+
198+
encrypted_columns.insert({column_name, column_properties});
199+
}
200+
}
201+
if (!encrypted_columns.empty()) {
202+
external_properties_builder.encrypted_columns(encrypted_columns);
203+
}
204+
205+
if (key_material_store != nullptr) {
206+
key_material_store->SaveMaterial();
207+
}
208+
209+
return external_properties_builder.build_external();
210+
}
211+
101212
ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties(
102213
int dek_length, const std::string& column_keys, FileKeyWrapper* key_wrapper) {
103214
ColumnPathToEncryptionPropertiesMap encrypted_columns;

cpp/src/parquet/encryption/crypto_factory.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,15 @@ struct PARQUET_EXPORT ExternalEncryptionConfiguration : public EncryptionConfigu
115115
/// algorithm specified in the encryption_algorithm field.
116116
/// If a column name appears in the new per_column_encryption map, it will be encrypted using the
117117
/// per column specific algorithm and key.
118-
/// If a column name appears in both, the per_column_encryption values will take precedence.
118+
/// If a column name appears in both, an exception will be thrown.
119119
std::unordered_map<std::string, ColumnEncryptionAttributes> per_column_encryption;
120120

121121
/// External encryption services may use additional context provided by the application to
122122
/// enforce robust access control. The values sent to the external service depend on each
123123
/// implementation.
124-
/// This values must be a valid JSON-formatted string.
124+
/// This value must be a valid JSON-formatted string.
125+
/// Validation of the string will be done by the external encryption service, Arrow will only
126+
/// forward this value.
125127
/// Format: "{\"user_id\": \"abc123\", \"location\": {\"lat\": 9.7489, \"lon\": -83.7534}}"
126128
std::string app_context;
127129

@@ -153,13 +155,21 @@ class PARQUET_EXPORT CryptoFactory {
153155
void RegisterKmsClientFactory(std::shared_ptr<KmsClientFactory> kms_client_factory);
154156

155157
/// Get the encryption properties for a Parquet file.
156-
/// If external key material is used then a file system and path to the
158+
/// If key material from outside the file is used, then a file system and path to the
157159
/// parquet file must be provided.
158160
std::shared_ptr<FileEncryptionProperties> GetFileEncryptionProperties(
159161
const KmsConnectionConfig& kms_connection_config,
160162
const EncryptionConfiguration& encryption_config, const std::string& file_path = "",
161163
const std::shared_ptr<::arrow::fs::FileSystem>& file_system = NULLPTR);
162164

165+
/// Get the external encryption properties for a Parquet file. Used when encryption
166+
/// will be provided by an external service.
167+
std::shared_ptr<ExternalFileEncryptionProperties> GetExternalFileEncryptionProperties(
168+
const KmsConnectionConfig& kms_connection_config,
169+
const ExternalEncryptionConfiguration& external_encryption_config,
170+
const std::string& file_path = "",
171+
const std::shared_ptr<::arrow::fs::FileSystem>& file_system = NULLPTR);
172+
163173
/// Get decryption properties for a Parquet file.
164174
/// If external key material is used then a file system and path to the
165175
/// parquet file must be provided.

0 commit comments

Comments
 (0)