diff --git a/rust/lance-encoding/src/lib.rs b/rust/lance-encoding/src/lib.rs index 19749e4df7..0a91de8678 100644 --- a/rust/lance-encoding/src/lib.rs +++ b/rust/lance-encoding/src/lib.rs @@ -6,7 +6,8 @@ use std::ops::Range; use bytes::Bytes; use futures::{future::BoxFuture, FutureExt, TryFutureExt}; -use lance_core::Result; +use lance_core::{Error, Result}; +use snafu::location; pub mod buffer; pub mod compression; @@ -88,8 +89,21 @@ impl BufferScheduler { Self { data } } - fn satisfy_request(&self, req: Range) -> Bytes { - self.data.slice(req.start as usize..req.end as usize) + fn satisfy_request(&self, req: Range) -> Result { + let start = req.start as usize; + let end = req.end as usize; + if end > self.data.len() { + return Err(Error::io( + format!( + "byte range {}..{} out of bounds for buffer of size {}", + start, + end, + self.data.len() + ), + location!(), + )); + } + Ok(self.data.slice(start..end)) } } @@ -99,10 +113,12 @@ impl EncodingsIo for BufferScheduler { ranges: Vec>, _priority: u64, ) -> BoxFuture<'static, Result>> { - std::future::ready(Ok(ranges - .into_iter() - .map(|range| self.satisfy_request(range)) - .collect::>())) + std::future::ready( + ranges + .into_iter() + .map(|range| self.satisfy_request(range)) + .collect::>>(), + ) .boxed() } } diff --git a/rust/lance-io/src/scheduler.rs b/rust/lance-io/src/scheduler.rs index a3591940ce..796d09c473 100644 --- a/rust/lance-io/src/scheduler.rs +++ b/rust/lance-io/src/scheduler.rs @@ -814,6 +814,21 @@ fn is_overlapping(range1: &Range, range2: &Range) -> bool { range1.start < range2.end && range2.start < range1.end } +fn checked_slice(data: &Bytes, range: Range) -> Result { + if range.end > data.len() { + return Err(Error::io( + format!( + "byte range {}..{} out of bounds for buffer of size {}", + range.start, + range.end, + data.len() + ), + location!(), + )); + } + Ok(data.slice(range)) +} + impl FileScheduler { /// Submit a batch of I/O requests to the reader /// @@ -893,26 +908,28 @@ impl FileScheduler { if is_overlapping(updated_range, orig_range) { // We need to undo the coalescing and splitting done earlier let start = orig_range.start as usize - byte_offset; + let data = &bytes_vec[updated_index]; if orig_range.end <= updated_range.end { // The original range is fully contained in the updated range, can do // zero-copy slice let end = orig_range.end as usize - byte_offset; - final_bytes.push(bytes_vec[updated_index].slice(start..end)); + final_bytes.push(checked_slice(data, start..end)?); } else { // The original read was split into multiple requests, need to copy // back into a single buffer let orig_size = orig_range.end - orig_range.start; let mut merged_bytes = Vec::with_capacity(orig_size as usize); - merged_bytes.extend_from_slice(&bytes_vec[updated_index].slice(start..)); + merged_bytes.extend_from_slice(&checked_slice(data, start..data.len())?); let mut copy_offset = merged_bytes.len() as u64; while copy_offset < orig_size { updated_index += 1; let next_range = &updated_requests[updated_index]; let bytes_to_take = (orig_size - copy_offset).min(next_range.end - next_range.start); - merged_bytes.extend_from_slice( - &bytes_vec[updated_index].slice(0..bytes_to_take as usize), - ); + merged_bytes.extend_from_slice(&checked_slice( + &bytes_vec[updated_index], + 0..bytes_to_take as usize, + )?); copy_offset += bytes_to_take; } final_bytes.push(Bytes::from(merged_bytes));