Skip to content

Commit 17d5287

Browse files
committed
Assorted panics we've found
1 parent 891d31d commit 17d5287

File tree

7 files changed

+41
-8
lines changed

7 files changed

+41
-8
lines changed

parquet/src/column/reader.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -569,11 +569,15 @@ fn parse_v1_level(
569569
match encoding {
570570
Encoding::RLE => {
571571
let i32_size = std::mem::size_of::<i32>();
572-
let data_size = read_num_bytes::<i32>(i32_size, buf.as_ref()) as usize;
573-
Ok((
574-
i32_size + data_size,
575-
buf.slice(i32_size..i32_size + data_size),
576-
))
572+
if i32_size <= buf.len() {
573+
let data_size = read_num_bytes::<i32>(i32_size, buf.as_ref()) as usize;
574+
let end =
575+
i32_size.checked_add(data_size).ok_or(general_err!("invalid level length"))?;
576+
if end <= buf.len() {
577+
return Ok((end, buf.slice(i32_size..end)));
578+
}
579+
}
580+
Err(general_err!("not enough data to read levels"))
577581
}
578582
#[allow(deprecated)]
579583
Encoding::BIT_PACKED => {

parquet/src/encodings/decoding.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,12 @@ impl<T: DataType> Decoder<T> for DictDecoder<T> {
382382
fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> {
383383
// First byte in `data` is bit width
384384
let bit_width = data.as_ref()[0];
385+
if bit_width > 32 {
386+
return Err(general_err!(
387+
"Invalid or corrupted Bit width {}. Max allowed is 32",
388+
bit_width
389+
));
390+
}
385391
let mut rle_decoder = RleDecoder::new(bit_width);
386392
rle_decoder.set_data(data.slice(1..));
387393
self.num_values = num_values;

parquet/src/encodings/rle.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,10 @@ impl RleDecoder {
513513
self.rle_left = (indicator_value >> 1) as u32;
514514
let value_width = bit_util::ceil(self.bit_width as usize, 8);
515515
self.current_value = bit_reader.get_aligned::<u64>(value_width);
516-
assert!(self.current_value.is_some());
516+
assert!(
517+
self.current_value.is_some(),
518+
"parquet_data_error: not enough data for RLE decoding"
519+
);
517520
}
518521
true
519522
} else {

parquet/src/file/reader.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,26 @@ impl ChunkReader for Bytes {
124124

125125
fn get_read(&self, start: u64) -> Result<Self::T> {
126126
let start = start as usize;
127+
if start > self.len() {
128+
return Err(eof_err!(
129+
"Expected to read at offset {}, while file has length {}",
130+
start,
131+
self.len()
132+
));
133+
}
127134
Ok(self.slice(start..).reader())
128135
}
129136

130137
fn get_bytes(&self, start: u64, length: usize) -> Result<Bytes> {
131138
let start = start as usize;
139+
if start > self.len() || start + length > self.len() {
140+
return Err(eof_err!(
141+
"Expected to read {} bytes at offset {}, while file has length {}",
142+
length,
143+
start,
144+
self.len()
145+
));
146+
}
132147
Ok(self.slice(start..start + length))
133148
}
134149
}

parquet/src/file/serialized_reader.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,9 @@ pub(crate) fn decode_page(
392392
let buffer = match decompressor {
393393
Some(decompressor) if can_decompress => {
394394
let uncompressed_page_size = usize::try_from(page_header.uncompressed_page_size)?;
395+
if offset > buffer.len() || offset > uncompressed_page_size {
396+
return Err(general_err!("Invalid page header"));
397+
}
395398
let decompressed_size = uncompressed_page_size - offset;
396399
let mut decompressed = Vec::with_capacity(uncompressed_page_size);
397400
decompressed.extend_from_slice(&buffer.as_ref()[..offset]);
@@ -458,7 +461,7 @@ pub(crate) fn decode_page(
458461
}
459462
_ => {
460463
// For unknown page type (e.g., INDEX_PAGE), skip and read next.
461-
unimplemented!("Page type {:?} is not supported", page_header.r#type)
464+
return Err(general_err!("Page type {:?} is not supported", page_header.r#type));
462465
}
463466
};
464467

parquet/src/schema/types.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,6 +1359,8 @@ fn schema_from_array_helper<'a>(
13591359
if !is_root_node {
13601360
builder = builder.with_repetition(rep);
13611361
}
1362+
} else if !is_root_node {
1363+
return Err(general_err!("Repetition level must be defined for non-root types"));
13621364
}
13631365
Ok((next_index, Arc::new(builder.build().unwrap())))
13641366
}

parquet/tests/arrow_reader/bad_data.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ fn test_parquet_1481() {
8484
}
8585

8686
#[test]
87-
#[should_panic(expected = "assertion failed: self.current_value.is_some()")]
87+
#[should_panic(expected = "parquet_data_error: not enough data for RLE decoding")]
8888
fn test_arrow_gh_41321() {
8989
let err = read_file("ARROW-GH-41321.parquet").unwrap_err();
9090
assert_eq!(err.to_string(), "TBD (currently panics)");

0 commit comments

Comments
 (0)