@@ -22,10 +22,12 @@ use std::cmp::Ordering;
2222use std:: collections:: HashMap ;
2323use std:: fmt:: { Display , Formatter } ;
2424use std:: hash:: Hash ;
25+ use std:: io:: Read as _;
2526use std:: sync:: Arc ;
2627
2728use _serde:: TableMetadataEnum ;
2829use chrono:: { DateTime , Utc } ;
30+ use flate2:: read:: GzDecoder ;
2931use serde:: { Deserialize , Serialize } ;
3032use serde_repr:: { Deserialize_repr , Serialize_repr } ;
3133use uuid:: Uuid ;
@@ -413,9 +415,24 @@ impl TableMetadata {
413415 file_io : & FileIO ,
414416 metadata_location : impl AsRef < str > ,
415417 ) -> Result < TableMetadata > {
416- let input_file = file_io. new_input ( metadata_location) ?;
418+ let input_file = file_io. new_input ( metadata_location. as_ref ( ) ) ?;
417419 let metadata_content = input_file. read ( ) . await ?;
418- let metadata = serde_json:: from_slice :: < TableMetadata > ( & metadata_content) ?;
420+
421+ // Check if the file is compressed by looking for the gzip "magic number".
422+ let metadata = if metadata_content. len ( ) > 2
423+ && metadata_content[ 0 ] == 0x1F
424+ && metadata_content[ 1 ] == 0x8B
425+ {
426+ let mut decoder = GzDecoder :: new ( metadata_content. as_ref ( ) ) ;
427+ let mut decompressed_data = Vec :: new ( ) ;
428+ decoder
429+ . read_to_end ( & mut decompressed_data)
430+ . map_err ( |e| Error :: new ( ErrorKind :: DataInvalid , e. to_string ( ) ) ) ?;
431+ serde_json:: from_slice ( & decompressed_data) ?
432+ } else {
433+ serde_json:: from_slice ( & metadata_content) ?
434+ } ;
435+
419436 Ok ( metadata)
420437 }
421438
@@ -1314,6 +1331,7 @@ impl SnapshotLog {
13141331mod tests {
13151332 use std:: collections:: HashMap ;
13161333 use std:: fs;
1334+ use std:: io:: Write as _;
13171335 use std:: sync:: Arc ;
13181336
13191337 use anyhow:: Result ;
@@ -3047,6 +3065,30 @@ mod tests {
30473065 assert_eq ! ( read_metadata, original_metadata) ;
30483066 }
30493067
3068+ #[ tokio:: test]
3069+ async fn test_table_metadata_read_compressed ( ) {
3070+ let temp_dir = TempDir :: new ( ) . unwrap ( ) ;
3071+ let metadata_location = temp_dir. path ( ) . join ( "v1.gz.metadata.json" ) ;
3072+
3073+ let original_metadata: TableMetadata = get_test_table_metadata ( "TableMetadataV2Valid.json" ) ;
3074+ let json = serde_json:: to_string ( & original_metadata) . unwrap ( ) ;
3075+
3076+ let mut encoder = flate2:: write:: GzEncoder :: new ( Vec :: new ( ) , flate2:: Compression :: default ( ) ) ;
3077+ encoder. write_all ( json. as_bytes ( ) ) . unwrap ( ) ;
3078+ std:: fs:: write ( & metadata_location, encoder. finish ( ) . unwrap ( ) )
3079+ . expect ( "failed to write metadata" ) ;
3080+
3081+ // Read the metadata back
3082+ let file_io = FileIOBuilder :: new_fs_io ( ) . build ( ) . unwrap ( ) ;
3083+ let metadata_location = metadata_location. to_str ( ) . unwrap ( ) ;
3084+ let read_metadata = TableMetadata :: read_from ( & file_io, metadata_location)
3085+ . await
3086+ . unwrap ( ) ;
3087+
3088+ // Verify the metadata matches
3089+ assert_eq ! ( read_metadata, original_metadata) ;
3090+ }
3091+
30503092 #[ tokio:: test]
30513093 async fn test_table_metadata_read_nonexistent_file ( ) {
30523094 // Create a FileIO instance
0 commit comments