diff --git a/rust/sedona-pointcloud/src/laz/builder.rs b/rust/sedona-pointcloud/src/las/builder.rs similarity index 97% rename from rust/sedona-pointcloud/src/laz/builder.rs rename to rust/sedona-pointcloud/src/las/builder.rs index 0b39a40a9..8e2e8a852 100644 --- a/rust/sedona-pointcloud/src/laz/builder.rs +++ b/rust/sedona-pointcloud/src/las/builder.rs @@ -36,7 +36,7 @@ use geoarrow_schema::Dimension; use las::{Header, Point}; use crate::{ - laz::{metadata::ExtraAttribute, options::LasExtraBytes, schema::try_schema_from_header}, + las::{metadata::ExtraAttribute, options::LasExtraBytes, schema::try_schema_from_header}, options::GeometryEncoding, }; @@ -516,7 +516,7 @@ mod tests { use object_store::{local::LocalFileSystem, path::Path, ObjectStore}; use crate::{ - laz::{options::LasExtraBytes, reader::LazFileReaderFactory}, + las::{options::LasExtraBytes, reader::LasFileReaderFactory}, options::PointcloudOptions, }; @@ -541,15 +541,15 @@ mod tests { let location = Path::from_filesystem_path(tmp_path).unwrap(); let object = store.head(&location).await.unwrap(); - let laz_file_reader = LazFileReaderFactory::new(Arc::new(store), None) + let file_reader = LasFileReaderFactory::new(Arc::new(store), None) .create_reader( PartitionedFile::new(location, object.size), PointcloudOptions::default(), ) .unwrap(); - let metadata = laz_file_reader.get_metadata().await.unwrap(); + let metadata = file_reader.get_metadata().await.unwrap(); - let batch = laz_file_reader + let batch = file_reader .get_batch(&metadata.chunk_table[0]) .await .unwrap(); @@ -570,20 +570,20 @@ mod tests { // file with extra attributes generated with `tests/data/generate.py` let extra_path = "tests/data/extra.laz"; - // read batch with `LazFileReader` + // read batch with `LasFileReader` let store = LocalFileSystem::new(); let location = Path::from_filesystem_path(extra_path).unwrap(); let object = store.head(&location).await.unwrap(); - let laz_file_reader = LazFileReaderFactory::new(Arc::new(store), None) + let file_reader = LasFileReaderFactory::new(Arc::new(store), None) .create_reader( PartitionedFile::new(location, object.size), PointcloudOptions::default().with_las_extra_bytes(LasExtraBytes::Typed), ) .unwrap(); - let metadata = laz_file_reader.get_metadata().await.unwrap(); + let metadata = file_reader.get_metadata().await.unwrap(); - let batch = laz_file_reader + let batch = file_reader .get_batch(&metadata.chunk_table[0]) .await .unwrap(); diff --git a/rust/sedona-pointcloud/src/laz/format.rs b/rust/sedona-pointcloud/src/las/format.rs similarity index 73% rename from rust/sedona-pointcloud/src/laz/format.rs rename to rust/sedona-pointcloud/src/las/format.rs index f187a6750..48e070542 100644 --- a/rust/sedona-pointcloud/src/laz/format.rs +++ b/rust/sedona-pointcloud/src/las/format.rs @@ -34,34 +34,51 @@ use futures::{StreamExt, TryStreamExt}; use object_store::{ObjectMeta, ObjectStore}; use crate::{ - laz::{metadata::LazMetadataReader, reader::LazFileReaderFactory, source::LazSource}, + las::{metadata::LasMetadataReader, reader::LasFileReaderFactory, source::LasSource}, options::PointcloudOptions, }; -const DEFAULT_LAZ_EXTENSION: &str = ".laz"; +#[derive(Debug, Clone, Copy)] +pub enum Extension { + Las, + Laz, +} -/// Factory struct used to create [LazFormat] -#[derive(Default)] -pub struct LazFormatFactory { - // inner options for LAZ +impl Extension { + pub fn as_str(&self) -> &str { + match self { + Extension::Las => "las", + Extension::Laz => "laz", + } + } +} + +/// Factory struct used to create [LasFormat] +pub struct LasFormatFactory { + // inner options for LAS/LAZ pub options: Option, + extension: Extension, } -impl LazFormatFactory { - /// Creates an instance of [LazFormatFactory] - pub fn new() -> Self { - Self { options: None } +impl LasFormatFactory { + /// Creates an instance of [LasFormatFactory] + pub fn new(extension: Extension) -> Self { + Self { + options: None, + extension, + } } - /// Creates an instance of [LazFormatFactory] with customized default options - pub fn new_with(options: PointcloudOptions) -> Self { + /// Creates an instance of [LasFormatFactory] with customized default options + pub fn new_with(options: PointcloudOptions, extension: Extension) -> Self { Self { options: Some(options), + extension, } } } -impl FileFormatFactory for LazFormatFactory { +impl FileFormatFactory for LasFormatFactory { fn create( &self, state: &dyn Session, @@ -80,11 +97,13 @@ impl FileFormatFactory for LazFormatFactory { options.set(k, v)?; } - Ok(Arc::new(LazFormat::default().with_options(options))) + Ok(Arc::new( + LasFormat::new(self.extension).with_options(options), + )) } fn default(&self) -> Arc { - Arc::new(LazFormat::default()) + Arc::new(LasFormat::new(self.extension)) } fn as_any(&self) -> &dyn Any { @@ -92,28 +111,36 @@ impl FileFormatFactory for LazFormatFactory { } } -impl GetExt for LazFormatFactory { +impl GetExt for LasFormatFactory { fn get_ext(&self) -> String { - // Removes the dot, i.e. ".laz" -> "laz" - DEFAULT_LAZ_EXTENSION[1..].to_string() + self.extension.as_str().to_string() } } -impl fmt::Debug for LazFormatFactory { +impl fmt::Debug for LasFormatFactory { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("LazFormatFactory") - .field("LazFormatFactory", &self.options) + f.debug_struct("LasFormatFactory") + .field("options", &self.options) + .field("extension", &self.extension) .finish() } } -/// The LAZ `FileFormat` implementation -#[derive(Debug, Default)] -pub struct LazFormat { +/// The LAS/LAZ `FileFormat` implementation +#[derive(Debug)] +pub struct LasFormat { pub options: PointcloudOptions, + extension: Extension, } -impl LazFormat { +impl LasFormat { + pub fn new(extension: Extension) -> Self { + Self { + options: Default::default(), + extension, + } + } + pub fn with_options(mut self, options: PointcloudOptions) -> Self { self.options = options; self @@ -121,13 +148,13 @@ impl LazFormat { } #[async_trait::async_trait] -impl FileFormat for LazFormat { +impl FileFormat for LasFormat { fn as_any(&self) -> &dyn Any { self } fn get_ext(&self) -> String { - LazFormatFactory::new().get_ext() + LasFormatFactory::new(self.extension).get_ext() } fn get_ext_with_compression( @@ -159,7 +186,7 @@ impl FileFormat for LazFormat { .map(|object_meta| async { let loc_path = object_meta.location.clone(); - let schema = LazMetadataReader::new(store, object_meta) + let schema = LasMetadataReader::new(store, object_meta) .with_file_metadata_cache(Some(Arc::clone(&file_metadata_cache))) .with_options(self.options.clone()) .fetch_schema() @@ -193,7 +220,7 @@ impl FileFormat for LazFormat { object: &ObjectMeta, ) -> Result { let file_metadata_cache = state.runtime_env().cache_manager.get_file_metadata_cache(); - LazMetadataReader::new(store, object) + LasMetadataReader::new(store, object) .with_options(self.options.clone()) .with_file_metadata_cache(Some(Arc::clone(&file_metadata_cache))) .fetch_statistics(&table_schema) @@ -208,17 +235,17 @@ impl FileFormat for LazFormat { let mut source = conf .file_source() .as_any() - .downcast_ref::() + .downcast_ref::() .cloned() - .ok_or_else(|| DataFusionError::External("Expected LazSource".into()))?; + .ok_or_else(|| DataFusionError::External("Expected LasSource".into()))?; source = source.with_options(self.options.clone()); let metadata_cache = state.runtime_env().cache_manager.get_file_metadata_cache(); let store = state .runtime_env() .object_store(conf.object_store_url.clone())?; - let laz_reader_factory = Arc::new(LazFileReaderFactory::new(store, Some(metadata_cache))); - let source = source.with_reader_factory(laz_reader_factory); + let reader_factory = Arc::new(LasFileReaderFactory::new(store, Some(metadata_cache))); + let source = source.with_reader_factory(reader_factory); let conf = FileScanConfigBuilder::from(conf) .with_source(Arc::new(source)) @@ -228,7 +255,7 @@ impl FileFormat for LazFormat { } fn file_source(&self) -> Arc { - Arc::new(LazSource::default().with_options(self.options.clone())) + Arc::new(LasSource::new(self.extension).with_options(self.options.clone())) } } @@ -240,29 +267,48 @@ mod test { use datafusion_datasource::file_format::FileFormatFactory; use las::{point::Format, Builder, Writer}; - use crate::laz::format::{LazFormat, LazFormatFactory}; + use crate::las::format::{Extension, LasFormat, LasFormatFactory}; fn setup_context() -> SessionContext { - let file_format = Arc::new(LazFormatFactory::new()); - let mut state = SessionStateBuilder::new().build(); + + let file_format = Arc::new(LasFormatFactory::new(Extension::Las)); + state.register_file_format(file_format, true).unwrap(); + + let file_format = Arc::new(LasFormatFactory::new(Extension::Laz)); state.register_file_format(file_format, true).unwrap(); SessionContext::new_with_state(state).enable_url_table() } #[tokio::test] - async fn laz_format_factory() { + async fn format_factory() { let ctx = SessionContext::new(); - let format_factory = Arc::new(LazFormatFactory::new()); + let format_factory = Arc::new(LasFormatFactory::new(Extension::Las)); let dyn_format = format_factory .create(&ctx.state(), &HashMap::new()) .unwrap(); - assert!(dyn_format.as_any().downcast_ref::().is_some()); + assert!(dyn_format.as_any().downcast_ref::().is_some()); + + let ctx = SessionContext::new(); + let format_factory = Arc::new(LasFormatFactory::new(Extension::Laz)); + let dyn_format = format_factory + .create(&ctx.state(), &HashMap::new()) + .unwrap(); + assert!(dyn_format.as_any().downcast_ref::().is_some()); } #[tokio::test] async fn projection() { + let ctx = setup_context(); + + let df = ctx + .sql("SELECT x, y, z FROM 'tests/data/extra.las'") + .await + .unwrap(); + + assert_eq!(df.schema().fields().len(), 3); + let ctx = setup_context(); let df = ctx .sql("SELECT x, y, z FROM 'tests/data/extra.laz'") diff --git a/rust/sedona-pointcloud/src/laz/metadata.rs b/rust/sedona-pointcloud/src/las/metadata.rs similarity index 86% rename from rust/sedona-pointcloud/src/laz/metadata.rs rename to rust/sedona-pointcloud/src/las/metadata.rs index c70b74342..ab9b40ac0 100644 --- a/rust/sedona-pointcloud/src/laz/metadata.rs +++ b/rust/sedona-pointcloud/src/las/metadata.rs @@ -37,14 +37,14 @@ use laz::laszip::ChunkTable; use object_store::{ObjectMeta, ObjectStore}; use crate::{ - laz::{ + las::{ schema::try_schema_from_header, - statistics::{chunk_statistics, LazStatistics}, + statistics::{chunk_statistics, LasStatistics}, }, options::PointcloudOptions, }; -/// Laz chunk metadata +/// LAS/LAZ chunk metadata #[derive(Debug, Clone)] pub struct ChunkMeta { pub num_points: u64, @@ -52,16 +52,16 @@ pub struct ChunkMeta { pub byte_range: Range, } -/// Laz metadata +/// LAS/LAZ metadata #[derive(Debug, Clone)] -pub struct LazMetadata { +pub struct LasMetadata { pub header: Arc
, pub extra_attributes: Arc>, pub chunk_table: Vec, - pub statistics: Option, + pub statistics: Option, } -impl FileMetadata for LazMetadata { +impl FileMetadata for LasMetadata { fn as_any(&self) -> &dyn Any { self } @@ -87,15 +87,15 @@ impl FileMetadata for LazMetadata { } } -/// Reader for laz file metadata in object storage. -pub struct LazMetadataReader<'a> { +/// Reader for LAS/LAZ file metadata in object storage. +pub struct LasMetadataReader<'a> { store: &'a dyn ObjectStore, object_meta: &'a ObjectMeta, file_metadata_cache: Option>, options: PointcloudOptions, } -impl<'a> LazMetadataReader<'a> { +impl<'a> LasMetadataReader<'a> { pub fn new(store: &'a dyn ObjectStore, object_meta: &'a ObjectMeta) -> Self { Self { store, @@ -127,8 +127,8 @@ impl<'a> LazMetadataReader<'a> { .map_err(DataFusionError::External) } - /// Fetch laz metadata from the remote object store - pub async fn fetch_metadata(&self) -> Result, DataFusionError> { + /// Fetch LAS/LAZ metadata from the remote object store + pub async fn fetch_metadata(&self) -> Result, DataFusionError> { let Self { store, object_meta, @@ -142,8 +142,8 @@ impl<'a> LazMetadataReader<'a> { .and_then(|file_metadata| { file_metadata .as_any() - .downcast_ref::() - .map(|laz_file_metadata| Arc::new(laz_file_metadata.to_owned())) + .downcast_ref::() + .map(|las_file_metadata| Arc::new(las_file_metadata.to_owned())) }) { return Ok(las_file_metadata); @@ -151,7 +151,11 @@ impl<'a> LazMetadataReader<'a> { let header = self.fetch_header().await?; let extra_attributes = extra_bytes_attributes(&header)?; - let chunk_table = chunk_table(*store, object_meta, &header).await?; + let chunk_table = if header.laz_vlr().is_ok() { + laz_chunk_table(*store, object_meta, &header).await? + } else { + las_chunk_table(&header).await? + }; let statistics = if options.collect_statistics { Some( chunk_statistics( @@ -167,7 +171,7 @@ impl<'a> LazMetadataReader<'a> { None }; - let metadata = Arc::new(LazMetadata { + let metadata = Arc::new(LasMetadata { header: Arc::new(header), extra_attributes: Arc::new(extra_attributes), chunk_table, @@ -181,7 +185,7 @@ impl<'a> LazMetadataReader<'a> { Ok(metadata) } - /// Read and parse the schema of the laz file + /// Read and parse the schema of the LAS/LAZ file pub async fn fetch_schema(&mut self) -> Result { let metadata = self.fetch_metadata().await?; @@ -194,7 +198,7 @@ impl<'a> LazMetadataReader<'a> { Ok(schema) } - /// Fetch the metadata from the laz file via [`Self::fetch_metadata`] and extracts + /// Fetch the metadata from the LAS/LAZ file via [`Self::fetch_metadata`] and extracts /// the statistics in the metadata pub async fn fetch_statistics( &self, @@ -237,7 +241,7 @@ impl<'a> LazMetadataReader<'a> { } } -pub(crate) async fn fetch_header( +async fn fetch_header( store: &(impl ObjectStore + ?Sized), object_meta: &ObjectMeta, ) -> Result> { @@ -300,7 +304,8 @@ pub struct ExtraAttribute { pub offset: Option, } -pub(crate) fn extra_bytes_attributes( +/// Extract [ExtraAttribute]s from [Header] +fn extra_bytes_attributes( header: &Header, ) -> Result, Box> { let mut attributes = Vec::new(); @@ -363,19 +368,16 @@ pub(crate) fn extra_bytes_attributes( Ok(attributes) } -pub(crate) async fn chunk_table( +async fn laz_chunk_table( store: &(impl ObjectStore + ?Sized), object_meta: &ObjectMeta, header: &Header, ) -> Result, Box> { + let laz_vlr = header.laz_vlr()?; + let num_points = header.number_of_points(); let mut point_offset = 0; - - let vlr_len = header.vlrs().iter().map(|v| v.len(false)).sum::(); - let header_size = header.version().header_size() as usize + header.padding().len(); - let mut byte_offset = (header_size + vlr_len + header.vlr_padding().len()) as u64; - - let laz_vlr = header.laz_vlr()?; + let mut byte_offset = offset_to_point_data(header); let ranges = [ byte_offset..byte_offset + 8, @@ -438,6 +440,41 @@ pub(crate) async fn chunk_table( Ok(chunks) } +async fn las_chunk_table(header: &Header) -> Result, Box> { + const CHUNK_SIZE: u64 = 50000; + + let num_points = header.number_of_points(); + let mut point_offset = 0; + let mut byte_offset = offset_to_point_data(header); + let record_size = header.point_format().len() as u64; + + let num_chunks = num_points.div_ceil(CHUNK_SIZE); + let mut chunks = Vec::with_capacity(num_chunks as usize); + + for _ in 0..num_chunks { + let point_count = CHUNK_SIZE.min(num_points - point_offset); + let byte_count = point_count * record_size; + + let chunk = ChunkMeta { + num_points: point_count, + point_offset, + byte_range: byte_offset..byte_offset + byte_count, + }; + + chunks.push(chunk); + point_offset += point_count; + byte_offset += byte_count; + } + + Ok(chunks) +} + +fn offset_to_point_data(header: &Header) -> u64 { + let vlr_len = header.vlrs().iter().map(|v| v.len(false)).sum::(); + let header_size = header.version().header_size() as usize + header.padding().len(); + (header_size + vlr_len + header.vlr_padding().len()) as u64 +} + #[cfg(test)] mod tests { use std::fs::File; @@ -445,7 +482,7 @@ mod tests { use las::{point::Format, Builder, Reader, Writer}; use object_store::{local::LocalFileSystem, path::Path, ObjectStore}; - use crate::laz::metadata::LazMetadataReader; + use crate::las::metadata::LasMetadataReader; #[tokio::test] async fn header_basic_e2e() { @@ -462,11 +499,11 @@ mod tests { let mut writer = Writer::new(tmp_file, header).unwrap(); writer.close().unwrap(); - // read with `LazMetadataReader` + // read with `LasMetadataReader` let store = LocalFileSystem::new(); let location = Path::from_filesystem_path(&tmp_path).unwrap(); let object_meta = store.head(&location).await.unwrap(); - let metadata_reader = LazMetadataReader::new(&store, &object_meta); + let metadata_reader = LasMetadataReader::new(&store, &object_meta); // read with las `Reader` let reader = Reader::from_path(&tmp_path).unwrap(); diff --git a/rust/sedona-pointcloud/src/laz/mod.rs b/rust/sedona-pointcloud/src/las/mod.rs similarity index 100% rename from rust/sedona-pointcloud/src/laz/mod.rs rename to rust/sedona-pointcloud/src/las/mod.rs diff --git a/rust/sedona-pointcloud/src/laz/opener.rs b/rust/sedona-pointcloud/src/las/opener.rs similarity index 79% rename from rust/sedona-pointcloud/src/laz/opener.rs rename to rust/sedona-pointcloud/src/las/opener.rs index aa78691c8..249e53925 100644 --- a/rust/sedona-pointcloud/src/laz/opener.rs +++ b/rust/sedona-pointcloud/src/las/opener.rs @@ -30,28 +30,28 @@ use sedona_expr::spatial_filter::SpatialFilter; use sedona_geometry::bounding_box::BoundingBox; use crate::{ - laz::{ - reader::{LazFileReader, LazFileReaderFactory}, + las::{ + reader::{LasFileReader, LasFileReaderFactory}, schema::try_schema_from_header, }, options::PointcloudOptions, }; -pub struct LazOpener { +pub struct LasOpener { /// Column indexes in `table_schema` needed by the query pub projection: Arc<[usize]>, /// Optional limit on the number of rows to read pub limit: Option, pub predicate: Option>, - /// Factory for instantiating laz reader - pub laz_file_reader_factory: Arc, + /// Factory for instantiating LAS/LAZ reader + pub file_reader_factory: Arc, /// Table options pub options: PointcloudOptions, /// Target batch size pub(crate) batch_size: usize, } -impl FileOpener for LazOpener { +impl FileOpener for LasOpener { fn open(&self, file: PartitionedFile) -> Result { let projection = self.projection.clone(); let limit = self.limit; @@ -59,16 +59,16 @@ impl FileOpener for LazOpener { let predicate = self.predicate.clone(); - let laz_reader: Box = self - .laz_file_reader_factory + let file_reader: Box = self + .file_reader_factory .create_reader(file.clone(), self.options.clone())?; Ok(Box::pin(async move { - let metadata = laz_reader.get_metadata().await?; + let metadata = file_reader.get_metadata().await?; let schema = Arc::new(try_schema_from_header( &metadata.header, - laz_reader.options.geometry_encoding, - laz_reader.options.las.extra_bytes, + file_reader.options.geometry_encoding, + file_reader.options.las.extra_bytes, )?); let pruning_predicate = predicate.and_then(|physical_expr| { @@ -146,7 +146,7 @@ impl FileOpener for LazOpener { } // fetch batch - let record_batch = laz_reader.get_batch(chunk_meta).await?; + let record_batch = file_reader.get_batch(chunk_meta).await?; let num_rows = record_batch.num_rows(); row_count += num_rows; @@ -179,6 +179,52 @@ impl FileOpener for LazOpener { mod tests { use sedona::context::SedonaContext; + #[tokio::test] + async fn las_statistics_pruning() { + // file with two clusters, one at 0.5 one at 1.0 + let path = "tests/data/large.las"; + + let ctx = SedonaContext::new_local_interactive().await.unwrap(); + + // ensure no faulty chunk pruning + ctx.sql("SET pointcloud.geometry_encoding = 'plain'") + .await + .unwrap(); + ctx.sql("SET pointcloud.collect_statistics = 'true'") + .await + .unwrap(); + + let count = ctx + .sql(&format!("SELECT * FROM \"{path}\" WHERE x < 0.7")) + .await + .unwrap() + .count() + .await + .unwrap(); + assert_eq!(count, 50000); + + let count = ctx + .sql(&format!("SELECT * FROM \"{path}\" WHERE y < 0.7")) + .await + .unwrap() + .count() + .await + .unwrap(); + assert_eq!(count, 50000); + + ctx.sql("SET pointcloud.geometry_encoding = 'wkb'") + .await + .unwrap(); + let count = ctx + .sql(&format!("SELECT * FROM \"{path}\" WHERE ST_Intersects(geometry, ST_GeomFromText('POLYGON ((0 0, 0.7 0, 0.7 0.7, 0 0.7, 0 0))'))")) + .await + .unwrap() + .count() + .await + .unwrap(); + assert_eq!(count, 50000); + } + #[tokio::test] async fn laz_statistics_pruning() { // file with two clusters, one at 0.5 one at 1.0 diff --git a/rust/sedona-pointcloud/src/laz/options.rs b/rust/sedona-pointcloud/src/las/options.rs similarity index 84% rename from rust/sedona-pointcloud/src/laz/options.rs rename to rust/sedona-pointcloud/src/las/options.rs index 124b9104c..de02628fe 100644 --- a/rust/sedona-pointcloud/src/laz/options.rs +++ b/rust/sedona-pointcloud/src/las/options.rs @@ -97,13 +97,19 @@ mod test { prelude::{SessionConfig, SessionContext}, }; - use crate::{laz::format::LazFormatFactory, options::PointcloudOptions}; + use crate::{ + las::format::{Extension, LasFormatFactory}, + options::PointcloudOptions, + }; fn setup_context() -> SessionContext { - let file_format = Arc::new(LazFormatFactory::new()); - let config = SessionConfig::new().with_option_extension(PointcloudOptions::default()); let mut state = SessionStateBuilder::new().with_config(config).build(); + + let file_format = Arc::new(LasFormatFactory::new(Extension::Las)); + state.register_file_format(file_format, true).unwrap(); + + let file_format = Arc::new(LasFormatFactory::new(Extension::Laz)); state.register_file_format(file_format, true).unwrap(); SessionContext::new_with_state(state).enable_url_table() @@ -114,6 +120,13 @@ mod test { let ctx = setup_context(); // default options + let df = ctx + .sql("SELECT x, y, z FROM 'tests/data/extra.las'") + .await + .unwrap(); + + assert_eq!(df.schema().fields().len(), 3); + let df = ctx .sql("SELECT x, y, z FROM 'tests/data/extra.laz'") .await @@ -128,6 +141,14 @@ mod test { ctx.sql("SET pointcloud.las.extra_bytes = 'blob'") .await .unwrap(); + + let df = ctx + .sql("SELECT geometry, extra_bytes FROM 'tests/data/extra.las'") + .await + .unwrap(); + + assert_eq!(df.schema().fields().len(), 2); + let df = ctx .sql("SELECT geometry, extra_bytes FROM 'tests/data/extra.laz'") .await diff --git a/rust/sedona-pointcloud/src/laz/reader.rs b/rust/sedona-pointcloud/src/las/reader.rs similarity index 76% rename from rust/sedona-pointcloud/src/laz/reader.rs rename to rust/sedona-pointcloud/src/las/reader.rs index 895bc0fdb..64939a19f 100644 --- a/rust/sedona-pointcloud/src/laz/reader.rs +++ b/rust/sedona-pointcloud/src/las/reader.rs @@ -15,7 +15,11 @@ // specific language governing permissions and limitations // under the License. -use std::{io::Cursor, ops::Range, sync::Arc}; +use std::{ + io::{Cursor, Read}, + ops::Range, + sync::Arc, +}; use arrow_array::RecordBatch; use bytes::Bytes; @@ -33,22 +37,22 @@ use laz::{ use object_store::ObjectStore; use crate::{ - laz::{ + las::{ builder::RowBuilder, - metadata::{ChunkMeta, LazMetadata, LazMetadataReader}, + metadata::{ChunkMeta, LasMetadata, LasMetadataReader}, }, options::PointcloudOptions, }; -/// Laz file reader factory +/// LAS/LAZ file reader factory #[derive(Debug)] -pub struct LazFileReaderFactory { +pub struct LasFileReaderFactory { store: Arc, metadata_cache: Option>, } -impl LazFileReaderFactory { - /// Create a new `LazFileReaderFactory`. +impl LasFileReaderFactory { + /// Create a new `LasFileReaderFactory`. pub fn new( store: Arc, metadata_cache: Option>, @@ -63,8 +67,8 @@ impl LazFileReaderFactory { &self, partitioned_file: PartitionedFile, options: PointcloudOptions, - ) -> Result, DataFusionError> { - Ok(Box::new(LazFileReader { + ) -> Result, DataFusionError> { + Ok(Box::new(LasFileReader { partitioned_file, store: self.store.clone(), metadata_cache: self.metadata_cache.clone(), @@ -73,21 +77,21 @@ impl LazFileReaderFactory { } } -/// Reader for a laz file in object storage. -pub struct LazFileReader { +/// Reader for a LAS/LAZ file in object storage. +pub struct LasFileReader { partitioned_file: PartitionedFile, store: Arc, metadata_cache: Option>, pub options: PointcloudOptions, } -impl LazFileReader { - pub fn get_metadata<'a>(&'a self) -> BoxFuture<'a, Result, DataFusionError>> { +impl LasFileReader { + pub fn get_metadata<'a>(&'a self) -> BoxFuture<'a, Result, DataFusionError>> { let object_meta = self.partitioned_file.object_meta.clone(); let metadata_cache = self.metadata_cache.clone(); async move { - LazMetadataReader::new(&self.store, &object_meta) + LasMetadataReader::new(&self.store, &object_meta) .with_file_metadata_cache(metadata_cache) .with_options(self.options.clone()) .fetch_metadata() @@ -103,10 +107,6 @@ impl LazFileReader { // fetch bytes let bytes = self.get_bytes(chunk_meta.byte_range.clone()).await?; - // laz decompressor - let mut decompressor = record_decompressor(&header, bytes) - .map_err(|e| DataFusionError::External(Box::new(e)))?; - // record batch builder let num_points = chunk_meta.num_points as usize; let mut builder = RowBuilder::new(num_points, header.clone()) @@ -116,22 +116,28 @@ impl LazFileReader { self.options.las.extra_bytes, ); - // transform - let format = header.point_format(); - let transforms = header.transforms(); - - let out = vec![0; format.len() as usize]; - let mut buffer = Cursor::new(out); - - for _ in 0..chunk_meta.num_points { - buffer.set_position(0); - decompressor.decompress_next(buffer.get_mut())?; - - let point = RawPoint::read_from(&mut buffer, format) - .map(|raw_point| Point::new(raw_point, transforms)) + // parse points + if header.laz_vlr().is_ok() { + // laz decompressor + let mut decompressor = record_decompressor(&header, bytes) .map_err(|e| DataFusionError::External(Box::new(e)))?; - builder.append(point); + let out = vec![0; header.point_format().len() as usize]; + let mut buffer = Cursor::new(out); + + for _ in 0..chunk_meta.num_points { + buffer.set_position(0); + decompressor.decompress_next(buffer.get_mut())?; + let point = read_point(&mut buffer, &header)?; + builder.append(point); + } + } else { + let mut buffer = Cursor::new(bytes); + + for _ in 0..chunk_meta.num_points { + let point = read_point(&mut buffer, &header)?; + builder.append(point); + } } let struct_array = builder.finish()?; @@ -181,6 +187,12 @@ pub fn record_decompressor( Ok(decompressor) } +pub(crate) fn read_point(buffer: R, header: &Header) -> Result { + RawPoint::read_from(buffer, header.point_format()) + .map(|raw_point| Point::new(raw_point, header.transforms())) + .map_err(|e| DataFusionError::External(Box::new(e))) +} + #[cfg(test)] mod tests { use std::{fs::File, sync::Arc}; @@ -189,7 +201,7 @@ mod tests { use las::{point::Format, Builder, Writer}; use object_store::{local::LocalFileSystem, path::Path, ObjectStore}; - use crate::laz::reader::LazFileReaderFactory; + use crate::las::reader::LasFileReaderFactory; #[tokio::test] async fn reader_basic_e2e() { @@ -206,20 +218,20 @@ mod tests { writer.write_point(Default::default()).unwrap(); writer.close().unwrap(); - // read batch with `LazFileReader` + // read batch with `LasFileReader` let store = LocalFileSystem::new(); let location = Path::from_filesystem_path(tmp_path).unwrap(); let object = store.head(&location).await.unwrap(); - let laz_file_reader = LazFileReaderFactory::new(Arc::new(store), None) + let file_reader = LasFileReaderFactory::new(Arc::new(store), None) .create_reader( PartitionedFile::new(location, object.size), Default::default(), ) .unwrap(); - let metadata = laz_file_reader.get_metadata().await.unwrap(); + let metadata = file_reader.get_metadata().await.unwrap(); - let batch = laz_file_reader + let batch = file_reader .get_batch(&metadata.chunk_table[0]) .await .unwrap(); diff --git a/rust/sedona-pointcloud/src/laz/schema.rs b/rust/sedona-pointcloud/src/las/schema.rs similarity index 98% rename from rust/sedona-pointcloud/src/laz/schema.rs rename to rust/sedona-pointcloud/src/las/schema.rs index bdcca6239..c3e683944 100644 --- a/rust/sedona-pointcloud/src/laz/schema.rs +++ b/rust/sedona-pointcloud/src/las/schema.rs @@ -22,7 +22,7 @@ use geoarrow_schema::{CoordType, Crs, Dimension, Metadata, PointType, WkbType}; use las::Header; use las_crs::{get_epsg_from_geotiff_crs, get_epsg_from_wkt_crs_bytes}; -use crate::{laz::options::LasExtraBytes, options::GeometryEncoding}; +use crate::{las::options::LasExtraBytes, options::GeometryEncoding}; // Arrow schema for LAS points pub fn try_schema_from_header( diff --git a/rust/sedona-pointcloud/src/laz/source.rs b/rust/sedona-pointcloud/src/las/source.rs similarity index 82% rename from rust/sedona-pointcloud/src/laz/source.rs rename to rust/sedona-pointcloud/src/las/source.rs index 79ea2df85..004d726cd 100644 --- a/rust/sedona-pointcloud/src/laz/source.rs +++ b/rust/sedona-pointcloud/src/las/source.rs @@ -29,39 +29,53 @@ use datafusion_physical_plan::{ use object_store::ObjectStore; use crate::{ - laz::{opener::LazOpener, reader::LazFileReaderFactory}, + las::{format::Extension, opener::LasOpener, reader::LasFileReaderFactory}, options::PointcloudOptions, }; -#[derive(Clone, Default, Debug)] -pub struct LazSource { +#[derive(Clone, Debug)] +pub struct LasSource { /// Optional metrics metrics: ExecutionPlanMetricsSet, /// The schema of the file. pub(crate) table_schema: Option, /// Optional predicate for row filtering during parquet scan pub(crate) predicate: Option>, - /// Laz file reader factory - pub(crate) reader_factory: Option>, + /// LAS/LAZ file reader factory + pub(crate) reader_factory: Option>, /// Batch size configuration pub(crate) batch_size: Option, pub(crate) projected_statistics: Option, pub(crate) options: PointcloudOptions, + pub(crate) extension: Extension, } -impl LazSource { +impl LasSource { + pub fn new(extension: Extension) -> Self { + Self { + metrics: Default::default(), + table_schema: Default::default(), + predicate: Default::default(), + reader_factory: Default::default(), + batch_size: Default::default(), + projected_statistics: Default::default(), + options: Default::default(), + extension, + } + } + pub fn with_options(mut self, options: PointcloudOptions) -> Self { self.options = options; self } - pub fn with_reader_factory(mut self, reader_factory: Arc) -> Self { + pub fn with_reader_factory(mut self, reader_factory: Arc) -> Self { self.reader_factory = Some(reader_factory); self } } -impl FileSource for LazSource { +impl FileSource for LasSource { fn create_file_opener( &self, object_store: Arc, @@ -72,17 +86,17 @@ impl FileSource for LazSource { .file_column_projection_indices() .unwrap_or_else(|| (0..base_config.projected_file_schema().fields().len()).collect()); - let laz_file_reader_factory = self + let file_reader_factory = self .reader_factory .clone() - .unwrap_or_else(|| Arc::new(LazFileReaderFactory::new(object_store, None))); + .unwrap_or_else(|| Arc::new(LasFileReaderFactory::new(object_store, None))); - Arc::new(LazOpener { + Arc::new(LasOpener { projection: Arc::from(projection), batch_size: self.batch_size.expect("Must be set"), limit: base_config.limit, predicate: self.predicate.clone(), - laz_file_reader_factory, + file_reader_factory, options: self.options.clone(), }) } @@ -132,7 +146,7 @@ impl FileSource for LazSource { } fn file_type(&self) -> &str { - "laz" + self.extension.as_str() } fn try_pushdown_filters( diff --git a/rust/sedona-pointcloud/src/laz/statistics.rs b/rust/sedona-pointcloud/src/las/statistics.rs similarity index 78% rename from rust/sedona-pointcloud/src/laz/statistics.rs rename to rust/sedona-pointcloud/src/las/statistics.rs index 75e38a3b8..87f11abd6 100644 --- a/rust/sedona-pointcloud/src/laz/statistics.rs +++ b/rust/sedona-pointcloud/src/las/statistics.rs @@ -27,22 +27,25 @@ use arrow_ipc::{reader::FileReader, writer::FileWriter}; use arrow_schema::{DataType, Field, Schema}; use datafusion_common::{arrow::compute::concat_batches, Column, DataFusionError, ScalarValue}; use datafusion_pruning::PruningStatistics; -use las::{raw::Point as RawPoint, Header, Point}; +use las::{Header, Point}; use object_store::{path::Path, ObjectMeta, ObjectStore, PutPayload}; use sedona_geometry::bounding_box::BoundingBox; -use crate::laz::{metadata::ChunkMeta, reader::record_decompressor}; +use crate::las::{ + metadata::ChunkMeta, + reader::{read_point, record_decompressor}, +}; -/// Spatial statistics (extent) of LAZ chunks for pruning. +/// Spatial statistics (extent) of LAS/LAZ chunks for pruning. /// /// It wraps a `RecordBatch` with x, y, z min and max values and row count per chunk. #[derive(Clone, Debug)] -pub struct LazStatistics { +pub struct LasStatistics { pub values: RecordBatch, } -impl LazStatistics { +impl LasStatistics { /// Get the [BoundingBox] of a chunk by index. pub fn get_bbox(&self, index: usize) -> Option { if index >= self.values.num_rows() { @@ -93,7 +96,7 @@ impl LazStatistics { } } -impl PruningStatistics for LazStatistics { +impl PruningStatistics for LasStatistics { fn min_values(&self, column: &Column) -> Option { match column.name.as_str() { "x" => self.values.column_by_name("x_min").cloned(), @@ -165,7 +168,7 @@ impl LasStatisticsBuilder { self.row_counts.append_value(row_count); } - pub fn finish(mut self) -> LazStatistics { + pub fn finish(mut self) -> LasStatistics { let schema = Schema::new([ Arc::new(Field::new("x_min", DataType::Float64, false)), Arc::new(Field::new("x_max", DataType::Float64, false)), @@ -190,11 +193,11 @@ impl LasStatisticsBuilder { ) .unwrap(); - LazStatistics { values: batch } + LasStatistics { values: batch } } } -/// Extract the [LazStatistics] from a LAZ file in an object store. +/// Extract the [LasStatistics] from a LAS/LAZ file in an object store. /// /// This will scan the entire file. To reuse the statistics, they can /// optionally be persisted, which creates a sidecar file with a `.stats` @@ -205,7 +208,7 @@ pub async fn chunk_statistics( chunk_table: &[ChunkMeta], header: &Header, persist: bool, -) -> Result { +) -> Result { let stats_path = Path::parse(format!("{}.stats", object_meta.location.as_ref()))?; match store.head(&stats_path).await { @@ -225,7 +228,7 @@ pub async fn chunk_statistics( assert_eq!(values.num_rows(), chunk_table.len()); - Ok(LazStatistics { values }) + Ok(LasStatistics { values }) } Err(object_store::Error::NotFound { path: _, source: _ }) => { // extract statistics @@ -261,15 +264,6 @@ async fn extract_chunk_stats( chunk_meta: &ChunkMeta, header: &Header, ) -> Result<[f64; 6], DataFusionError> { - // fetch chunk bytes - let bytes = store - .get_range(&object_meta.location, chunk_meta.byte_range.clone()) - .await?; - - // setup laz decompressor - let mut decompressor = - record_decompressor(header, bytes).map_err(|e| DataFusionError::External(Box::new(e)))?; - // statistics let mut stats = [ f64::INFINITY, @@ -280,18 +274,8 @@ async fn extract_chunk_stats( f64::NEG_INFINITY, ]; - let out = vec![0; header.point_format().len() as usize]; - let mut buffer = Cursor::new(out); - - for _ in 0..chunk_meta.num_points { - buffer.set_position(0); - decompressor.decompress_next(buffer.get_mut())?; - - let point = RawPoint::read_from(&mut buffer, header.point_format()) - .map(|raw_point| Point::new(raw_point, header.transforms())) - .map_err(|e| DataFusionError::External(Box::new(e)))?; - - stats = [ + let extend = |stats: &mut [f64; 6], point: Point| { + *stats = [ stats[0].min(point.x), stats[1].max(point.x), stats[2].min(point.y), @@ -299,6 +283,34 @@ async fn extract_chunk_stats( stats[4].min(point.z), stats[5].max(point.z), ]; + }; + + // fetch chunk bytes + let bytes = store + .get_range(&object_meta.location, chunk_meta.byte_range.clone()) + .await?; + + if header.laz_vlr().is_ok() { + // setup laz decompressor + let mut decompressor = record_decompressor(header, bytes) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + + let out = vec![0; header.point_format().len() as usize]; + let mut buffer = Cursor::new(out); + + for _ in 0..chunk_meta.num_points { + buffer.set_position(0); + decompressor.decompress_next(buffer.get_mut())?; + let point = read_point(&mut buffer, header)?; + extend(&mut stats, point); + } + } else { + let mut buffer = Cursor::new(bytes); + + for _ in 0..chunk_meta.num_points { + let point = read_point(&mut buffer, header)?; + extend(&mut stats, point); + } } Ok(stats) @@ -315,55 +327,56 @@ mod tests { use object_store::{local::LocalFileSystem, path::Path, ObjectStore}; use sedona_geometry::bounding_box::BoundingBox; - use crate::{laz::metadata::LazMetadataReader, options::PointcloudOptions}; + use crate::{las::metadata::LasMetadataReader, options::PointcloudOptions}; #[tokio::test] async fn chunk_statistics() { - let path = "tests/data/large.laz"; - - // read with `LazMetadataReader` - let store = LocalFileSystem::new(); - let location = Path::from_filesystem_path(path).unwrap(); - let object_meta = store.head(&location).await.unwrap(); - - let metadata_reader = LazMetadataReader::new(&store, &object_meta); - let metadata = metadata_reader.fetch_metadata().await.unwrap(); - assert!(metadata.statistics.is_none()); - - let options = PointcloudOptions { - collect_statistics: true, - ..Default::default() - }; - let metadata_reader = LazMetadataReader::new(&store, &object_meta).with_options(options); - let metadata = metadata_reader.fetch_metadata().await.unwrap(); - let statistics = metadata.statistics.as_ref().unwrap(); - assert_eq!(statistics.num_containers(), 2); - assert_eq!( - statistics - .row_counts(&Column::from_name("")) - .unwrap() - .as_primitive::() - .value(0), - 50000 - ); - assert_eq!( - statistics.get_bbox(0), - Some(BoundingBox::xyzm( - (0.5, 0.5), - (0.5, 0.5), - Some((0.5, 0.5).into()), - None - )) - ); - assert_eq!( - statistics.get_bbox(1), - Some(BoundingBox::xyzm( - (1.0, 1.0), - (1.0, 1.0), - Some((1.0, 1.0).into()), - None - )) - ); + for path in ["tests/data/large.las", "tests/data/large.laz"] { + // read with `LasMetadataReader` + let store = LocalFileSystem::new(); + let location = Path::from_filesystem_path(path).unwrap(); + let object_meta = store.head(&location).await.unwrap(); + + let metadata_reader = LasMetadataReader::new(&store, &object_meta); + let metadata = metadata_reader.fetch_metadata().await.unwrap(); + assert!(metadata.statistics.is_none()); + + let options = PointcloudOptions { + collect_statistics: true, + ..Default::default() + }; + let metadata_reader = + LasMetadataReader::new(&store, &object_meta).with_options(options); + let metadata = metadata_reader.fetch_metadata().await.unwrap(); + let statistics = metadata.statistics.as_ref().unwrap(); + assert_eq!(statistics.num_containers(), 2); + assert_eq!( + statistics + .row_counts(&Column::from_name("")) + .unwrap() + .as_primitive::() + .value(0), + 50000 + ); + assert_eq!( + statistics.get_bbox(0), + Some(BoundingBox::xyzm( + (0.5, 0.5), + (0.5, 0.5), + Some((0.5, 0.5).into()), + None + )) + ); + assert_eq!( + statistics.get_bbox(1), + Some(BoundingBox::xyzm( + (1.0, 1.0), + (1.0, 1.0), + Some((1.0, 1.0).into()), + None + )) + ); + } } #[tokio::test] @@ -388,7 +401,7 @@ mod tests { writer.write_point(point).unwrap(); writer.close().unwrap(); - // read with `LazMetadataReader` + // read with `LasMetadataReader` let store = LocalFileSystem::new(); let location = Path::from_filesystem_path(&tmp_path).unwrap(); let object_meta = store.head(&location).await.unwrap(); @@ -398,7 +411,7 @@ mod tests { persist_statistics: true, ..Default::default() }; - let metadata_reader = LazMetadataReader::new(&store, &object_meta).with_options(options); + let metadata_reader = LasMetadataReader::new(&store, &object_meta).with_options(options); let metadata = metadata_reader.fetch_metadata().await.unwrap(); assert!(tmp_path.with_extension("laz.stats").exists()); diff --git a/rust/sedona-pointcloud/src/lib.rs b/rust/sedona-pointcloud/src/lib.rs index 5c9acc713..7a75e0410 100644 --- a/rust/sedona-pointcloud/src/lib.rs +++ b/rust/sedona-pointcloud/src/lib.rs @@ -15,5 +15,5 @@ // specific language governing permissions and limitations // under the License. -pub mod laz; +pub mod las; pub mod options; diff --git a/rust/sedona-pointcloud/src/options.rs b/rust/sedona-pointcloud/src/options.rs index c8d2fd903..51e5067bb 100644 --- a/rust/sedona-pointcloud/src/options.rs +++ b/rust/sedona-pointcloud/src/options.rs @@ -23,7 +23,7 @@ use datafusion_common::{ extensions_options, }; -use crate::laz::options::{LasExtraBytes, LasOptions}; +use crate::las::options::{LasExtraBytes, LasOptions}; /// Geometry representation #[derive(Clone, Copy, Default, PartialEq, Eq, Debug)] diff --git a/rust/sedona-pointcloud/tests/data/extra.las b/rust/sedona-pointcloud/tests/data/extra.las new file mode 100644 index 000000000..42b635281 Binary files /dev/null and b/rust/sedona-pointcloud/tests/data/extra.las differ diff --git a/rust/sedona-pointcloud/tests/data/extra.laz b/rust/sedona-pointcloud/tests/data/extra.laz index 016c6c295..9b6d45cbb 100644 Binary files a/rust/sedona-pointcloud/tests/data/extra.laz and b/rust/sedona-pointcloud/tests/data/extra.laz differ diff --git a/rust/sedona-pointcloud/tests/data/generate.py b/rust/sedona-pointcloud/tests/data/generate.py index 0d4cc5222..1b9f367a8 100644 --- a/rust/sedona-pointcloud/tests/data/generate.py +++ b/rust/sedona-pointcloud/tests/data/generate.py @@ -29,24 +29,19 @@ DATA_DIR = Path(__file__).resolve().parent - - LAS_VERSIONS = [f"1.{p}" for p in range(5)] # 1.0 - 1.4 POINT_FORMAT = list(range(11)) # 0 - 10 (>= 6 for LAS 1.4+) -# Pragmatic choice -version = LAS_VERSIONS[4] -point_format = POINT_FORMAT[6] -# Header -header = laspy.LasHeader(point_format=point_format, version=version) +# ----------------------------------------------------------------------------- +# Extra attribute test file with a single point (extra.las/extra.laz) +# ----------------------------------------------------------------------------- +# header +header = laspy.LasHeader(point_format=POINT_FORMAT[6], version=LAS_VERSIONS[4]) header.offsets = np.array([1.0, 1.0, 1.0]) header.scales = np.array([0.1, 0.1, 0.1]) - -# ----------------------------------------------------------------------------- -# Extra attribute test file with a single point (extra.laz) -# ----------------------------------------------------------------------------- +# extra attributes DATA_TYPES = [ "uint8", "int8", @@ -59,8 +54,6 @@ "float32", "float64", ] - -# Extra attributes for dt in DATA_TYPES: name = f"{dt}_plain" header.add_extra_dim(laspy.point.format.ExtraBytesParams(name, dt, "", None, None)) @@ -75,41 +68,55 @@ laspy.point.format.ExtraBytesParams(name, dt, "", None, None, [42]) ) -# Write laz with one point -with laspy.open( - DATA_DIR.joinpath("extra.laz"), mode="w", header=header, do_compress=True -) as writer: - point_record = laspy.ScaleAwarePointRecord.zeros(point_count=1, header=header) - point_record.x = [0.5] - point_record.y = [0.5] - point_record.z = [0.5] +point_record = laspy.ScaleAwarePointRecord.zeros(point_count=1, header=header) +point_record.x = [0.5] +point_record.y = [0.5] +point_record.z = [0.5] - for dt in DATA_TYPES: - name = f"{dt}_plain" - point_record[name] = [21] +for dt in DATA_TYPES: + name = f"{dt}_plain" + point_record[name] = [21] + + name = f"{dt}_scaled" + point_record[name] = [21] - name = f"{dt}_scaled" - point_record[name] = [21] + name = f"{dt}_nodata" + point_record[name] = [42] - name = f"{dt}_nodata" - point_record[name] = [42] +# write las with one point +with laspy.open(DATA_DIR.joinpath("extra.las"), mode="w", header=header) as writer: + writer.write_points(point_record) +# write laz with one point +with laspy.open( + DATA_DIR.joinpath("extra.laz"), mode="w", header=header, do_compress=True +) as writer: writer.write_points(point_record) # ----------------------------------------------------------------------------- -# Large test file to evaluate pruning (large.laz) +# Large test file to evaluate pruning (large.las/large.laz) # ----------------------------------------------------------------------------- -with laspy.open( - DATA_DIR.joinpath("large.laz"), mode="w", header=header, do_compress=True -) as writer: - N = 100000 +# header +header = laspy.LasHeader(point_format=POINT_FORMAT[6], version=LAS_VERSIONS[4]) +header.offsets = np.array([1.0, 1.0, 1.0]) +header.scales = np.array([0.1, 0.1, 0.1]) - point_record = laspy.ScaleAwarePointRecord.zeros(point_count=N, header=header) +# points +N = 100000 +point_record = laspy.ScaleAwarePointRecord.zeros(point_count=N, header=header) - # create two distinct chunks - point_record.x = [0.5] * int(N / 2) + [1] * int(N / 2) - point_record.y = [0.5] * int(N / 2) + [1] * int(N / 2) - point_record.z = [0.5] * int(N / 2) + [1] * int(N / 2) +# create two distinct chunks +point_record.x = [0.5] * int(N / 2) + [1] * int(N / 2) +point_record.y = [0.5] * int(N / 2) + [1] * int(N / 2) +point_record.z = [0.5] * int(N / 2) + [1] * int(N / 2) + +# write las file +with laspy.open(DATA_DIR.joinpath("large.las"), mode="w", header=header) as writer: + writer.write_points(point_record) +# write laz file +with laspy.open( + DATA_DIR.joinpath("large.laz"), mode="w", header=header, do_compress=True +) as writer: writer.write_points(point_record) diff --git a/rust/sedona-pointcloud/tests/data/large.las b/rust/sedona-pointcloud/tests/data/large.las new file mode 100644 index 000000000..e51f717fb Binary files /dev/null and b/rust/sedona-pointcloud/tests/data/large.las differ diff --git a/rust/sedona-pointcloud/tests/data/large.laz b/rust/sedona-pointcloud/tests/data/large.laz index e4cb06db7..2dc07bb59 100644 Binary files a/rust/sedona-pointcloud/tests/data/large.laz and b/rust/sedona-pointcloud/tests/data/large.laz differ diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs index ee2c96355..218ad6ebc 100644 --- a/rust/sedona/src/context.rs +++ b/rust/sedona/src/context.rs @@ -58,7 +58,10 @@ use sedona_geoparquet::{ }; #[cfg(feature = "pointcloud")] use sedona_pointcloud::{ - laz::{format::LazFormatFactory, options::LasExtraBytes}, + las::{ + format::{Extension, LasFormatFactory}, + options::LasExtraBytes, + }, options::{GeometryEncoding, PointcloudOptions}, }; @@ -124,7 +127,8 @@ impl SedonaContext { state.register_file_format(Arc::new(GeoParquetFormatFactory::new()), true)?; #[cfg(feature = "pointcloud")] { - state.register_file_format(Arc::new(LazFormatFactory::new()), false)?; + state.register_file_format(Arc::new(LasFormatFactory::new(Extension::Laz)), false)?; + state.register_file_format(Arc::new(LasFormatFactory::new(Extension::Las)), false)?; } // Enable dynamic file query (i.e., select * from 'filename')