Skip to content

Commit cdd9f0a

Browse files
committed
add tests
1 parent 19c0f20 commit cdd9f0a

File tree

5 files changed

+90
-1
lines changed

5 files changed

+90
-1
lines changed

parquet/src/column/reader.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,25 @@ mod tests {
601601
use crate::util::test_common::page_util::InMemoryPageReader;
602602
use crate::util::test_common::rand_gen::make_pages;
603603

604+
#[test]
605+
fn test_parse_v1_level_invalid_length() {
606+
// Say length is 10, but buffer is only 4
607+
let buf = Bytes::from(vec![10, 0, 0, 0]);
608+
let err = parse_v1_level(1, 100, Encoding::RLE, buf).unwrap_err();
609+
assert_eq!(
610+
err.to_string(),
611+
"Parquet error: not enough data to read levels"
612+
);
613+
614+
// Say length is 4, but buffer is only 3
615+
let buf = Bytes::from(vec![4, 0, 0]);
616+
let err = parse_v1_level(1, 100, Encoding::RLE, buf).unwrap_err();
617+
assert_eq!(
618+
err.to_string(),
619+
"Parquet error: not enough data to read levels"
620+
);
621+
}
622+
604623
const NUM_LEVELS: usize = 128;
605624
const NUM_PAGES: usize = 2;
606625
const MAX_DEF_LEVEL: i16 = 5;

parquet/src/encodings/decoding.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,6 +1390,16 @@ mod tests {
13901390
test_plain_skip::<FixedLenByteArrayType>(Bytes::from(data_bytes), 3, 6, 4, &[]);
13911391
}
13921392

1393+
#[test]
1394+
fn test_dict_decoder_empty_data() {
1395+
let mut decoder = DictDecoder::<Int32Type>::new();
1396+
let err = decoder.set_data(Bytes::new(), 10).unwrap_err();
1397+
assert_eq!(
1398+
err.to_string(),
1399+
"Parquet error: Not enough bytes to decode bit_width"
1400+
);
1401+
}
1402+
13931403
fn test_plain_decode<T: DataType>(
13941404
data: Bytes,
13951405
num_values: usize,

parquet/src/file/reader.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,3 +288,34 @@ impl Iterator for FilePageIterator {
288288
}
289289

290290
impl PageIterator for FilePageIterator {}
291+
292+
#[cfg(test)]
293+
mod tests {
294+
use super::*;
295+
296+
#[test]
297+
fn test_bytes_chunk_reader_get_read_out_of_bounds() {
298+
let data = Bytes::from(vec![0, 1, 2, 3]);
299+
let err = data.get_read(5).unwrap_err();
300+
assert_eq!(
301+
err.to_string(),
302+
"Parquet error: Expected to read at offset 5, while file has length 4"
303+
);
304+
}
305+
306+
#[test]
307+
fn test_bytes_chunk_reader_get_bytes_out_of_bounds() {
308+
let data = Bytes::from(vec![0, 1, 2, 3]);
309+
let err = data.get_bytes(5, 1).unwrap_err();
310+
assert_eq!(
311+
err.to_string(),
312+
"Parquet error: Expected to read 1 bytes at offset 5, while file has length 4"
313+
);
314+
315+
let err = data.get_bytes(2, 3).unwrap_err();
316+
assert_eq!(
317+
err.to_string(),
318+
"Parquet error: Expected to read 3 bytes at offset 2, while file has length 4"
319+
);
320+
}
321+
}

parquet/src/file/serialized_reader.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,6 +1142,35 @@ mod tests {
11421142

11431143
use super::*;
11441144

1145+
#[test]
1146+
fn test_decode_page_invalid_offset() {
1147+
use crate::file::metadata::thrift_gen::DataPageHeaderV2;
1148+
1149+
let mut page_header = PageHeader::default();
1150+
page_header.r#type = PageType::DATA_PAGE_V2;
1151+
page_header.uncompressed_page_size = 10;
1152+
page_header.compressed_page_size = 10;
1153+
let mut data_page_header_v2 = DataPageHeaderV2::default();
1154+
data_page_header_v2.definition_levels_byte_length = 11; // offset > uncompressed_page_size
1155+
page_header.data_page_header_v2 = Some(data_page_header_v2);
1156+
1157+
let buffer = Bytes::new();
1158+
let err = decode_page(page_header, buffer, Type::INT32, None).unwrap_err();
1159+
assert_eq!(err.to_string(), "Parquet error: Invalid page header");
1160+
}
1161+
1162+
#[test]
1163+
fn test_decode_unsupported_page() {
1164+
let mut page_header = PageHeader::default();
1165+
page_header.r#type = PageType::INDEX_PAGE;
1166+
let buffer = Bytes::new();
1167+
let err = decode_page(page_header, buffer, Type::INT32, None).unwrap_err();
1168+
assert_eq!(
1169+
err.to_string(),
1170+
"Parquet error: Page type INDEX_PAGE is not supported"
1171+
);
1172+
}
1173+
11451174
#[test]
11461175
fn test_cursor_and_file_has_the_same_behaviour() {
11471176
let mut buf: Vec<u8> = Vec::new();

parquet/tests/arrow_reader/bad_data.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ fn test_arrow_gh_41321() {
8888
let err = read_file("ARROW-GH-41321.parquet").unwrap_err();
8989
assert_eq!(
9090
err.to_string(),
91-
"External: Parquet argument error: Parquet error: Invalid or corrupted Bit width 254. Max allowed is 32"
91+
"External: Parquet argument error: Parquet error: Invalid or corrupted RLE bit width 254. Max allowed is 32"
9292
);
9393
}
9494

0 commit comments

Comments
 (0)