Skip to content

Commit 381c69b

Browse files
committed
tmp
1 parent e2ebc45 commit 381c69b

File tree

3 files changed

+24
-28
lines changed

3 files changed

+24
-28
lines changed

parquet/src/column/writer/mod.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,13 +1359,12 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
13591359
encryption_properties,
13601360
&self.descr,
13611361
))
1362-
.clear_statistics()
1362+
.clear_statistics()
13631363
} else {
1364-
builder
1365-
.set_column_crypto_metadata(get_column_crypto_metadata(
1366-
encryption_properties,
1367-
&self.descr,
1368-
))
1364+
builder.set_column_crypto_metadata(get_column_crypto_metadata(
1365+
encryption_properties,
1366+
&self.descr,
1367+
))
13691368
}
13701369
} else {
13711370
builder

parquet/src/file/metadata/writer.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,6 @@ impl MetadataObjectWriter {
631631
match &self.file_encryptor {
632632
Some(file_encryptor) => {
633633
let unencrypted_row_groups = row_groups.clone();
634-
// TODO: unencrypted_row_groups should not contain statistics for encrypted columns
635634
let encrypted_row_groups = Self::encrypt_row_groups(row_groups, file_encryptor)?;
636635
Ok((encrypted_row_groups, Some(unencrypted_row_groups)))
637636
}

parquet/tests/encryption/encryption.rs

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,7 @@ pub fn test_row_group_statistics_plaintext_encrypted_write() {
731731

732732
let file_encryption_properties = FileEncryptionProperties::builder(footer_key)
733733
.with_plaintext_footer(true)
734+
.with_column_key("x", column_key.clone())
734735
.build()
735736
.unwrap();
736737

@@ -740,13 +741,14 @@ pub fn test_row_group_statistics_plaintext_encrypted_write() {
740741

741742
// Write encrypted data with plaintext footer
742743
let values = Int32Array::from(vec![8, 3, 4, 19, 5]);
743-
let schema = Arc::new(Schema::new(vec![Field::new(
744-
"x",
745-
values.data_type().clone(),
746-
true,
747-
)]));
744+
let schema = Arc::new(Schema::new(vec![
745+
Field::new("x", values.data_type().clone(), true),
746+
Field::new("y", values.data_type().clone(), true),
747+
]));
748+
748749
let values = Arc::new(values);
749-
let record_batches = vec![RecordBatch::try_new(schema.clone(), vec![values]).unwrap()];
750+
let record_batches =
751+
vec![RecordBatch::try_new(schema.clone(), vec![values.clone(), values]).unwrap()];
750752

751753
let temp_file = tempfile::tempfile().unwrap();
752754
let mut writer = ArrowWriter::try_new(&temp_file, schema, Some(props)).unwrap();
@@ -755,7 +757,7 @@ pub fn test_row_group_statistics_plaintext_encrypted_write() {
755757
}
756758
let _file_metadata = writer.close().unwrap();
757759

758-
// Check column statistics can be read by decrypting
760+
// Check column statistics are read given plaintext footer and available decryption properties
759761
let options =
760762
ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
761763
let reader_metadata = ArrowReaderMetadata::load(&temp_file, options.clone()).unwrap();
@@ -764,7 +766,10 @@ pub fn test_row_group_statistics_plaintext_encrypted_write() {
764766
assert_eq!(metadata.num_row_groups(), 1);
765767

766768
let row_group = &metadata.row_groups()[0];
767-
assert_eq!(row_group.columns().len(), 1);
769+
assert_eq!(row_group.columns().len(), 2);
770+
771+
// Statistics should be available from decrypted data
772+
assert!(&row_group.columns()[0].statistics().is_some());
768773
let column_stats = &row_group.columns()[0].statistics().unwrap();
769774
assert_eq!(
770775
column_stats.min_bytes_opt(),
@@ -775,18 +780,19 @@ pub fn test_row_group_statistics_plaintext_encrypted_write() {
775780
Some(19i32.to_le_bytes().as_slice())
776781
);
777782

778-
// TODO: statistics shouldn't be available without decryption when footer is plaintext
779-
//
780-
// Check column statistics are not available in plaintext footer
783+
// Check column statistics are not read given plaintext footer and not available decryption properties
781784
let options = ArrowReaderOptions::default();
782785
let reader_metadata = ArrowReaderMetadata::load(&temp_file, options.clone()).unwrap();
783786
let metadata = reader_metadata.metadata();
784787

785788
assert_eq!(metadata.num_row_groups(), 1);
786789

787790
let row_group = &metadata.row_groups()[0];
788-
assert_eq!(row_group.columns().len(), 1);
789-
let column_stats = &row_group.columns()[0].statistics().unwrap();
791+
assert_eq!(row_group.columns().len(), 2);
792+
assert!(&row_group.columns()[0].statistics().is_none());
793+
assert!(&row_group.columns()[1].statistics().is_some());
794+
795+
let column_stats = &row_group.columns()[1].statistics().unwrap();
790796
assert_eq!(
791797
column_stats.min_bytes_opt(),
792798
Some(3i32.to_le_bytes().as_slice())
@@ -795,14 +801,6 @@ pub fn test_row_group_statistics_plaintext_encrypted_write() {
795801
column_stats.max_bytes_opt(),
796802
Some(19i32.to_le_bytes().as_slice())
797803
);
798-
799-
let builder =
800-
ParquetRecordBatchReaderBuilder::try_new_with_options(temp_file, options).unwrap();
801-
let mut record_reader = builder.build().unwrap();
802-
assert_eq!(
803-
record_reader.next().unwrap().unwrap_err().to_string(),
804-
"Parquet argument error: Parquet error: Required field type_ is missing"
805-
);
806804
}
807805

808806
#[test]

0 commit comments

Comments
 (0)