1515// specific language governing permissions and limitations
1616// under the License.
1717
18- use std:: { collections:: HashSet , io:: Cursor , sync:: Arc } ;
18+ use std:: {
19+ collections:: HashSet ,
20+ io:: { Cursor , Read } ,
21+ sync:: Arc ,
22+ } ;
1923
2024use arrow_array:: {
2125 builder:: PrimitiveBuilder ,
@@ -25,17 +29,15 @@ use arrow_array::{
2529} ;
2630use arrow_ipc:: { reader:: FileReader , writer:: FileWriter } ;
2731use arrow_schema:: { DataType , Field , Schema } ;
32+ use byteorder:: { LittleEndian , ReadBytesExt } ;
2833use datafusion_common:: { arrow:: compute:: concat_batches, Column , DataFusionError , ScalarValue } ;
2934use datafusion_pruning:: PruningStatistics ;
30- use las:: { Header , Point } ;
35+ use las:: Header ;
3136use object_store:: { path:: Path , ObjectMeta , ObjectStore , PutPayload } ;
32-
37+ use rayon :: iter :: { IntoParallelRefIterator , ParallelIterator } ;
3338use sedona_geometry:: bounding_box:: BoundingBox ;
3439
35- use crate :: las:: {
36- metadata:: ChunkMeta ,
37- reader:: { read_point, record_decompressor} ,
38- } ;
40+ use crate :: las:: { metadata:: ChunkMeta , reader:: record_decompressor} ;
3941
4042/// Spatial statistics (extent) of LAS/LAZ chunks for pruning.
4143///
@@ -208,6 +210,7 @@ pub async fn chunk_statistics(
208210 chunk_table : & [ ChunkMeta ] ,
209211 header : & Header ,
210212 persist : bool ,
213+ parallel : bool ,
211214) -> Result < LasStatistics , DataFusionError > {
212215 let stats_path = Path :: parse ( format ! ( "{}.stats" , object_meta. location. as_ref( ) ) ) ?;
213216
@@ -234,9 +237,27 @@ pub async fn chunk_statistics(
234237 // extract statistics
235238 let mut builder = LasStatisticsBuilder :: new_with_capacity ( chunk_table. len ( ) ) ;
236239
237- for chunk_meta in chunk_table {
238- let stats = extract_chunk_stats ( store, object_meta, chunk_meta, header) . await ?;
239- builder. add_values ( & stats, chunk_meta. num_points ) ;
240+ if parallel {
241+ let stats: Vec < [ f64 ; 6 ] > = chunk_table
242+ . par_iter ( )
243+ . map ( |chunk_meta| {
244+ futures:: executor:: block_on ( extract_chunk_stats (
245+ store,
246+ object_meta,
247+ chunk_meta,
248+ header,
249+ ) )
250+ } )
251+ . collect :: < Result < Vec < [ f64 ; 6 ] > , DataFusionError > > ( ) ?;
252+
253+ for ( stat, meta) in stats. iter ( ) . zip ( chunk_table) {
254+ builder. add_values ( stat, meta. num_points ) ;
255+ }
256+ } else {
257+ for chunk_meta in chunk_table {
258+ let stats = extract_chunk_stats ( store, object_meta, chunk_meta, header) . await ?;
259+ builder. add_values ( & stats, chunk_meta. num_points ) ;
260+ }
240261 }
241262
242263 let stats = builder. finish ( ) ;
@@ -274,14 +295,14 @@ async fn extract_chunk_stats(
274295 f64:: NEG_INFINITY ,
275296 ] ;
276297
277- let extend = |stats : & mut [ f64 ; 6 ] , point : Point | {
298+ let extend = |stats : & mut [ f64 ; 6 ] , point : [ f64 ; 3 ] | {
278299 * stats = [
279- stats[ 0 ] . min ( point. x ) ,
280- stats[ 1 ] . max ( point. x ) ,
281- stats[ 2 ] . min ( point. y ) ,
282- stats[ 3 ] . max ( point. y ) ,
283- stats[ 4 ] . min ( point. z ) ,
284- stats[ 5 ] . max ( point. z ) ,
300+ stats[ 0 ] . min ( point[ 0 ] ) ,
301+ stats[ 1 ] . max ( point[ 0 ] ) ,
302+ stats[ 2 ] . min ( point[ 1 ] ) ,
303+ stats[ 3 ] . max ( point[ 1 ] ) ,
304+ stats[ 4 ] . min ( point[ 2 ] ) ,
305+ stats[ 5 ] . max ( point[ 2 ] ) ,
285306 ] ;
286307 } ;
287308
@@ -301,21 +322,29 @@ async fn extract_chunk_stats(
301322 for _ in 0 ..chunk_meta. num_points {
302323 buffer. set_position ( 0 ) ;
303324 decompressor. decompress_next ( buffer. get_mut ( ) ) ?;
304- let point = read_point ( & mut buffer, header) ?;
325+ let point = parse_coords ( & mut buffer, header) ?;
305326 extend ( & mut stats, point) ;
306327 }
307328 } else {
308329 let mut buffer = Cursor :: new ( bytes) ;
309330
310331 for _ in 0 ..chunk_meta. num_points {
311- let point = read_point ( & mut buffer, header) ?;
332+ let point = parse_coords ( & mut buffer, header) ?;
312333 extend ( & mut stats, point) ;
313334 }
314335 }
315336
316337 Ok ( stats)
317338}
318339
340+ fn parse_coords < R : Read > ( mut buffer : R , header : & Header ) -> Result < [ f64 ; 3 ] , DataFusionError > {
341+ let transforms = header. transforms ( ) ;
342+ let x = transforms. x . direct ( buffer. read_i32 :: < LittleEndian > ( ) ?) ;
343+ let y = transforms. y . direct ( buffer. read_i32 :: < LittleEndian > ( ) ?) ;
344+ let z = transforms. z . direct ( buffer. read_i32 :: < LittleEndian > ( ) ?) ;
345+ Ok ( [ x, y, z] )
346+ }
347+
319348#[ cfg( test) ]
320349mod tests {
321350 use std:: fs:: File ;
0 commit comments