File tree Expand file tree Collapse file tree 7 files changed +41
-8
lines changed Expand file tree Collapse file tree 7 files changed +41
-8
lines changed Original file line number Diff line number Diff line change @@ -569,11 +569,15 @@ fn parse_v1_level(
569569 match encoding {
570570 Encoding :: RLE => {
571571 let i32_size = std:: mem:: size_of :: < i32 > ( ) ;
572- let data_size = read_num_bytes :: < i32 > ( i32_size, buf. as_ref ( ) ) as usize ;
573- Ok ( (
574- i32_size + data_size,
575- buf. slice ( i32_size..i32_size + data_size) ,
576- ) )
572+ if i32_size <= buf. len ( ) {
573+ let data_size = read_num_bytes :: < i32 > ( i32_size, buf. as_ref ( ) ) as usize ;
574+ let end =
575+ i32_size. checked_add ( data_size) . ok_or ( general_err ! ( "invalid level length" ) ) ?;
576+ if end <= buf. len ( ) {
577+ return Ok ( ( end, buf. slice ( i32_size..end) ) ) ;
578+ }
579+ }
580+ Err ( general_err ! ( "not enough data to read levels" ) )
577581 }
578582 #[ allow( deprecated) ]
579583 Encoding :: BIT_PACKED => {
Original file line number Diff line number Diff line change @@ -382,6 +382,12 @@ impl<T: DataType> Decoder<T> for DictDecoder<T> {
382382 fn set_data ( & mut self , data : Bytes , num_values : usize ) -> Result < ( ) > {
383383 // First byte in `data` is bit width
384384 let bit_width = data. as_ref ( ) [ 0 ] ;
385+ if bit_width > 32 {
386+ return Err ( general_err ! (
387+ "Invalid or corrupted Bit width {}. Max allowed is 32" ,
388+ bit_width
389+ ) ) ;
390+ }
385391 let mut rle_decoder = RleDecoder :: new ( bit_width) ;
386392 rle_decoder. set_data ( data. slice ( 1 ..) ) ;
387393 self . num_values = num_values;
Original file line number Diff line number Diff line change @@ -513,7 +513,10 @@ impl RleDecoder {
513513 self . rle_left = ( indicator_value >> 1 ) as u32 ;
514514 let value_width = bit_util:: ceil ( self . bit_width as usize , 8 ) ;
515515 self . current_value = bit_reader. get_aligned :: < u64 > ( value_width) ;
516- assert ! ( self . current_value. is_some( ) ) ;
516+ assert ! (
517+ self . current_value. is_some( ) ,
518+ "parquet_data_error: not enough data for RLE decoding"
519+ ) ;
517520 }
518521 true
519522 } else {
Original file line number Diff line number Diff line change @@ -124,11 +124,26 @@ impl ChunkReader for Bytes {
124124
125125 fn get_read ( & self , start : u64 ) -> Result < Self :: T > {
126126 let start = start as usize ;
127+ if start > self . len ( ) {
128+ return Err ( eof_err ! (
129+ "Expected to read at offset {}, while file has length {}" ,
130+ start,
131+ self . len( )
132+ ) ) ;
133+ }
127134 Ok ( self . slice ( start..) . reader ( ) )
128135 }
129136
130137 fn get_bytes ( & self , start : u64 , length : usize ) -> Result < Bytes > {
131138 let start = start as usize ;
139+ if start > self . len ( ) || start + length > self . len ( ) {
140+ return Err ( eof_err ! (
141+ "Expected to read {} bytes at offset {}, while file has length {}" ,
142+ length,
143+ start,
144+ self . len( )
145+ ) ) ;
146+ }
132147 Ok ( self . slice ( start..start + length) )
133148 }
134149}
Original file line number Diff line number Diff line change @@ -392,6 +392,9 @@ pub(crate) fn decode_page(
392392 let buffer = match decompressor {
393393 Some ( decompressor) if can_decompress => {
394394 let uncompressed_page_size = usize:: try_from ( page_header. uncompressed_page_size ) ?;
395+ if offset > buffer. len ( ) || offset > uncompressed_page_size {
396+ return Err ( general_err ! ( "Invalid page header" ) ) ;
397+ }
395398 let decompressed_size = uncompressed_page_size - offset;
396399 let mut decompressed = Vec :: with_capacity ( uncompressed_page_size) ;
397400 decompressed. extend_from_slice ( & buffer. as_ref ( ) [ ..offset] ) ;
@@ -458,7 +461,7 @@ pub(crate) fn decode_page(
458461 }
459462 _ => {
460463 // For unknown page type (e.g., INDEX_PAGE), skip and read next.
461- unimplemented ! ( "Page type {:?} is not supported" , page_header. r#type)
464+ return Err ( general_err ! ( "Page type {:?} is not supported" , page_header. r#type) ) ;
462465 }
463466 } ;
464467
Original file line number Diff line number Diff line change @@ -1359,6 +1359,8 @@ fn schema_from_array_helper<'a>(
13591359 if !is_root_node {
13601360 builder = builder. with_repetition ( rep) ;
13611361 }
1362+ } else if !is_root_node {
1363+ return Err ( general_err ! ( "Repetition level must be defined for non-root types" ) ) ;
13621364 }
13631365 Ok ( ( next_index, Arc :: new ( builder. build ( ) . unwrap ( ) ) ) )
13641366 }
Original file line number Diff line number Diff line change @@ -84,7 +84,7 @@ fn test_parquet_1481() {
8484}
8585
8686#[ test]
87- #[ should_panic( expected = "assertion failed: self.current_value.is_some() " ) ]
87+ #[ should_panic( expected = "parquet_data_error: not enough data for RLE decoding " ) ]
8888fn test_arrow_gh_41321 ( ) {
8989 let err = read_file ( "ARROW-GH-41321.parquet" ) . unwrap_err ( ) ;
9090 assert_eq ! ( err. to_string( ) , "TBD (currently panics)" ) ;
You can’t perform that action at this time.
0 commit comments