@@ -37,6 +37,7 @@ use crate::expr::{Bind, BoundPredicate, Predicate};
3737use crate :: io:: FileIO ;
3838use crate :: spec:: { DataContentType , SnapshotRef } ;
3939use crate :: table:: Table ;
40+ use crate :: traced_stream:: TracedStream ;
4041use crate :: utils:: available_parallelism;
4142use crate :: { Error , ErrorKind , Result } ;
4243
@@ -330,7 +331,11 @@ pub struct TableScan {
330331impl TableScan {
331332 /// Returns a stream of [`FileScanTask`]s.
332333 pub async fn plan_files ( & self ) -> Result < FileScanTaskStream > {
334+ let span = tracing:: trace_span!( "plan_files" ) ;
335+ let _entered = span. enter ( ) ;
336+
333337 let Some ( plan_context) = self . plan_context . as_ref ( ) else {
338+ tracing:: debug!( "file plan requested for a table with no snapshots" ) ;
334339 return Ok ( Box :: pin ( futures:: stream:: empty ( ) ) ) ;
335340 } ;
336341
@@ -351,7 +356,7 @@ impl TableScan {
351356
352357 let delete_file_index = Arc :: new ( delete_file_index) ;
353358
354- Ok ( TableScan :: process_manifest_contexts (
359+ let stream = TableScan :: process_manifest_contexts (
355360 data_contexts,
356361 self . concurrency_limit_manifest_files ,
357362 self . concurrency_limit_manifest_entries ,
@@ -360,7 +365,9 @@ impl TableScan {
360365 async move { Self :: process_data_manifest_entry ( ctx, delete_file_index) }
361366 } ,
362367 )
363- . boxed ( ) )
368+ . boxed ( ) ;
369+
370+ Ok ( Box :: pin ( TracedStream :: new ( stream, span. clone ( ) ) ) )
364371 }
365372
366373 /// Returns an [`ArrowRecordBatchStream`].
@@ -419,19 +426,27 @@ impl TableScan {
419426 . try_filter_map ( |opt_task| async move { Ok ( opt_task) } )
420427 }
421428
429+ #[ tracing:: instrument( skip_all, fields( file_path) ) ]
422430 fn process_data_manifest_entry (
423431 manifest_entry_context : Result < ManifestEntryContext > ,
424432 delete_file_index : Arc < DeleteFileIndex > ,
425433 ) -> Result < Option < FileScanTask > > {
426434 let manifest_entry_context = manifest_entry_context?;
435+ tracing:: Span :: current ( ) . record (
436+ "file_path" ,
437+ manifest_entry_context. manifest_entry . file_path ( ) ,
438+ ) ;
427439
428440 // skip processing this manifest entry if it has been marked as deleted
429441 if !manifest_entry_context. manifest_entry . is_alive ( ) {
442+ metrics:: counter!( "iceberg.scan.data_file.skipped" , "reason" => "not_alive" )
443+ . increment ( 1 ) ;
430444 return Ok ( None ) ;
431445 }
432446
433447 // abort the plan if we encounter a manifest entry for a delete file
434448 if manifest_entry_context. manifest_entry . content_type ( ) != DataContentType :: Data {
449+ tracing:: error!( "Encountered an entry for a delete file in a data file manifest" ) ;
435450 return Err ( Error :: new (
436451 ErrorKind :: FeatureUnsupported ,
437452 "Encountered an entry for a delete file in a data file manifest" ,
@@ -455,6 +470,8 @@ impl TableScan {
455470 // skip any data file whose partition data indicates that it can't contain
456471 // any data that matches this scan's filter
457472 if !expression_evaluator. eval ( manifest_entry_context. manifest_entry . data_file ( ) ) ? {
473+ metrics:: counter!( "iceberg.scan.data_file.skipped" , "reason" => "partition" )
474+ . increment ( 1 ) ;
458475 return Ok ( None ) ;
459476 }
460477
@@ -464,30 +481,41 @@ impl TableScan {
464481 manifest_entry_context. manifest_entry . data_file ( ) ,
465482 false ,
466483 ) ? {
484+ metrics:: counter!( "iceberg.scan.data_file.skipped" , "reason" => "file_metrics" )
485+ . increment ( 1 ) ;
467486 return Ok ( None ) ;
468487 }
469488 }
470489
471490 // congratulations! the manifest entry has made its way through the
472491 // entire plan without getting filtered out. Create a corresponding
473492 // FileScanTask and push it to the result stream
493+ metrics:: counter!( "iceberg.scan.data_file.included" ) . increment ( 1 ) ;
474494 Ok ( Some (
475495 manifest_entry_context. into_file_scan_task ( delete_file_index) ?,
476496 ) )
477497 }
478498
499+ #[ tracing:: instrument( skip_all, fields( file_path) ) ]
479500 fn process_delete_manifest_entry (
480501 manifest_entry_context : Result < ManifestEntryContext > ,
481502 ) -> Result < Option < DeleteFileContext > > {
482503 let manifest_entry_context = manifest_entry_context?;
504+ tracing:: Span :: current ( ) . record (
505+ "file_path" ,
506+ manifest_entry_context. manifest_entry . file_path ( ) ,
507+ ) ;
483508
484509 // skip processing this manifest entry if it has been marked as deleted
485510 if !manifest_entry_context. manifest_entry . is_alive ( ) {
511+ metrics:: counter!( "iceberg.scan.delete_file.skipped" , "reason" => "not_alive" )
512+ . increment ( 1 ) ;
486513 return Ok ( None ) ;
487514 }
488515
489516 // abort the plan if we encounter a manifest entry that is not for a delete file
490517 if manifest_entry_context. manifest_entry . content_type ( ) == DataContentType :: Data {
518+ tracing:: error!( "Encountered an entry for a data file in a delete manifest" ) ;
491519 return Err ( Error :: new (
492520 ErrorKind :: FeatureUnsupported ,
493521 "Encountered an entry for a data file in a delete manifest" ,
@@ -506,10 +534,13 @@ impl TableScan {
506534 // skip any data file whose partition data indicates that it can't contain
507535 // any data that matches this scan's filter
508536 if !expression_evaluator. eval ( manifest_entry_context. manifest_entry . data_file ( ) ) ? {
537+ metrics:: counter!( "iceberg.scan.delete_file.skipped" , "reason" => "partition" )
538+ . increment ( 1 ) ;
509539 return Ok ( None ) ;
510540 }
511541 }
512542
543+ metrics:: counter!( "iceberg.scan.delete_file.included" ) . increment ( 1 ) ;
513544 Ok ( Some ( DeleteFileContext {
514545 manifest_entry : manifest_entry_context. manifest_entry . clone ( ) ,
515546 partition_spec_id : manifest_entry_context. partition_spec_id ,
0 commit comments