lance-format · Xuanwo · Jan 12, 2026 · Jan 12, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -228,7 +228,6 @@ redundant_pub_crate = "deny"
 string_add_assign = "deny"
 string_add = "deny"
 string_lit_as_bytes = "deny"
-string_to_string = "deny"
 use_self = "deny"
 dbg_macro = "deny"
 trait_duplication_in_bounds = "deny"

diff --git a/python/python/tests/test_ingestion.py b/python/python/tests/test_ingestion.py
@@ -14,7 +14,7 @@ def can_write(data, dataset, schema=None):
         lance.write_dataset(pa.table(data, schema=schema), dataset.uri, mode="append")
 
     def cannot_write(data, dataset, schema=None):
-        with pytest.raises(Exception, match="contained null values"):
+        with pytest.raises(Exception, match=r"contain(ed|s) null values"):
             can_write(data, dataset, schema)
 
     nullable_dataset = lance.write_dataset(

diff --git a/rust/compression/bitpacking/src/lib.rs b/rust/compression/bitpacking/src/lib.rs
@@ -15,7 +15,6 @@
 
 use arrayref::{array_mut_ref, array_ref};
 use core::mem::size_of;
-use paste::paste;
 
 pub const FL_ORDER: [usize; 8] = [0, 4, 2, 6, 1, 5, 3, 7];
 

diff --git a/rust/examples/src/hnsw.rs b/rust/examples/src/hnsw.rs
@@ -79,15 +79,14 @@ async fn main() {
     let max_level = 7;
 
     // 1. Generate a synthetic test data of specified dimensions
-    let dataset = if uri.is_none() {
-        println!("No uri is provided, generating test dataset...");
-        let output = "test_vectors.lance";
-        create_test_vector_dataset(output, 1000, 64).await;
-        Dataset::open(output).await.expect("Failed to open dataset")
-    } else {
-        Dataset::open(uri.as_ref().unwrap())
-            .await
-            .expect("Failed to open dataset")
+    let dataset = match uri.as_deref() {
+        None => {
+            println!("No uri is provided, generating test dataset...");
+            let output = "test_vectors.lance";
+            create_test_vector_dataset(output, 1000, 64).await;
+            Dataset::open(output).await.expect("Failed to open dataset")
+        }
+        Some(uri) => Dataset::open(uri).await.expect("Failed to open dataset"),
     };
 
     println!("Dataset schema: {:#?}", dataset.schema());

diff --git a/rust/lance-core/src/datatypes/field.rs b/rust/lance-core/src/datatypes/field.rs
@@ -151,7 +151,7 @@ pub struct Field {
     pub encoding: Option<Encoding>,
     pub nullable: bool,
 
-    pub children: Vec<Field>,
+    pub children: Vec<Self>,
 
     /// Dictionary value array if this field is dictionary.
     pub dictionary: Option<Dictionary>,

diff --git a/rust/lance-core/src/datatypes/schema.rs b/rust/lance-core/src/datatypes/schema.rs
@@ -1533,7 +1533,7 @@ pub fn parse_field_path(path: &str) -> Result<Vec<String>> {
 /// For example: ["parent", "child.with.dot"] formats to "parent.`child.with.dot`"
 /// For example: ["meta-data", "user-id"] formats to "`meta-data`.`user-id`"
 /// Backticks in field names are escaped by doubling them.
-/// For example: ["field`with`backticks"] formats to "`field``with``backticks`"
+/// For example: \["field`with`backticks"\] formats to "`field``with``backticks`"
 pub fn format_field_path(fields: &[&str]) -> String {
     fields
         .iter()

diff --git a/rust/lance-core/src/utils/deletion.rs b/rust/lance-core/src/utils/deletion.rs
@@ -12,8 +12,9 @@ const BITMAP_THRESDHOLD: usize = 5_000;
 // TODO: Benchmark to find a better value.
 
 /// Represents a set of deleted row offsets in a single fragment.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub enum DeletionVector {
+    #[default]
     NoDeletions,
     Set(HashSet<u32>),
     Bitmap(RoaringBitmap),
@@ -182,12 +183,6 @@ impl OffsetMapper {
     }
 }
 
-impl Default for DeletionVector {
-    fn default() -> Self {
-        Self::NoDeletions
-    }
-}
-
 impl From<&DeletionVector> for RoaringBitmap {
     fn from(value: &DeletionVector) -> Self {
         match value {

diff --git a/rust/lance-datafusion/src/substrait.rs b/rust/lance-datafusion/src/substrait.rs
@@ -82,11 +82,17 @@ fn remove_extension_types(
     for (substrait_field, arrow_field) in fields.types.iter().zip(arrow_schema.fields.iter()) {
         let num_fields = count_fields(substrait_field);
 
+        let kind = substrait_field.kind.as_ref().unwrap();
+        let is_user_defined = match kind {
+            Kind::UserDefined(_) => true,
+            // Keep compatibility with older Substrait plans.
+            #[allow(deprecated)]
+            Kind::UserDefinedTypeReference(_) => true,
+            _ => false,
+        };
+
         if !substrait_schema.names[field_index].starts_with("__unlikely_name_placeholder")
-            && !matches!(
-                substrait_field.kind.as_ref().unwrap(),
-                Kind::UserDefined(_) | Kind::UserDefinedTypeReference(_)
-            )
+            && !is_user_defined
         {
             kept_substrait_fields.push(substrait_field.clone());
             kept_arrow_fields.push(arrow_field.clone());
@@ -118,10 +124,10 @@ fn remove_extension_types(
 fn remap_expr_references(expr: &mut Expression, mapping: &HashMap<usize, usize>) -> Result<()> {
     match expr.rex_type.as_mut().unwrap() {
         // Simple, no field references possible
-        RexType::Literal(_)
-        | RexType::Nested(_)
-        | RexType::Enum(_)
-        | RexType::DynamicParameter(_) => Ok(()),
+        RexType::Literal(_) | RexType::Nested(_) | RexType::DynamicParameter(_) => Ok(()),
+        // Enum literals are deprecated in Substrait and should only appear in older plans.
+        #[allow(deprecated)]
+        RexType::Enum(_) => Ok(()),
         // Complex operators not supported in filters
         RexType::WindowFunction(_) | RexType::Subquery(_) => Err(Error::invalid_input(
             "Window functions or subqueries not allowed in filter expression",

diff --git a/rust/lance-index/src/scalar/btree.rs b/rust/lance-index/src/scalar/btree.rs
@@ -1755,19 +1755,21 @@ pub async fn train_btree_index(
         Field::new(BTREE_IDS_COLUMN, DataType::UInt64, false),
     ]));
 
-    let mut sub_index_file;
-    if partition_id.is_none() {
-        sub_index_file = index_store
-            .new_index_file(BTREE_PAGES_NAME, flat_schema.clone())
-            .await?;
-    } else {
-        sub_index_file = index_store
-            .new_index_file(
-                part_page_data_file_path(partition_id.unwrap()).as_str(),
-                flat_schema.clone(),
-            )
-            .await?;
-    }
+    let mut sub_index_file = match partition_id {
+        None => {
+            index_store
+                .new_index_file(BTREE_PAGES_NAME, flat_schema.clone())
+                .await?
+        }
+        Some(partition_id) => {
+            index_store
+                .new_index_file(
+                    part_page_data_file_path(partition_id).as_str(),
+                    flat_schema.clone(),
+                )
+                .await?
+        }
+    };
 
     let mut encoded_batches = Vec::new();
     let mut batch_idx = 0;
@@ -1802,19 +1804,21 @@ pub async fn train_btree_index(
         RANGE_PARTITIONED_META_KEY.to_string(),
         range_id.is_some().to_string(),
     );
-    let mut btree_index_file;
-    if partition_id.is_none() {
-        btree_index_file = index_store
-            .new_index_file(BTREE_LOOKUP_NAME, Arc::new(file_schema))
-            .await?;
-    } else {
-        btree_index_file = index_store
-            .new_index_file(
-                part_lookup_file_path(partition_id.unwrap()).as_str(),
-                Arc::new(file_schema),
-            )
-            .await?;
-    }
+    let mut btree_index_file = match partition_id {
+        None => {
+            index_store
+                .new_index_file(BTREE_LOOKUP_NAME, Arc::new(file_schema))
+                .await?
+        }
+        Some(partition_id) => {
+            index_store
+                .new_index_file(
+                    part_lookup_file_path(partition_id).as_str(),
+                    Arc::new(file_schema),
+                )
+                .await?
+        }
+    };
     btree_index_file.write_record_batch(record_batch).await?;
     btree_index_file.finish().await?;
     Ok(())

diff --git a/rust/lance-index/src/scalar/expression.rs b/rust/lance-index/src/scalar/expression.rs
@@ -965,9 +965,9 @@ impl PartialEq for ScalarIndexSearch {
 /// modify the results of scalar lookups
 #[derive(Debug, Clone)]
 pub enum ScalarIndexExpr {
-    Not(Box<ScalarIndexExpr>),
-    And(Box<ScalarIndexExpr>, Box<ScalarIndexExpr>),
-    Or(Box<ScalarIndexExpr>, Box<ScalarIndexExpr>),
+    Not(Box<Self>),
+    And(Box<Self>, Box<Self>),
+    Or(Box<Self>, Box<Self>),
     Query(ScalarIndexSearch),
 }
 

diff --git a/rust/lance-index/src/scalar/inverted/query.rs b/rust/lance-index/src/scalar/inverted/query.rs
@@ -71,18 +71,13 @@ impl Default for FtsSearchParams {
     }
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)]
 pub enum Operator {
     And,
+    #[default]
     Or,
 }
 
-impl Default for Operator {
-    fn default() -> Self {
-        Self::Or
-    }
-}
-
 impl TryFrom<&str> for Operator {
     type Error = Error;
     fn try_from(value: &str) -> Result<Self> {

diff --git a/rust/lance-io/src/lib.rs b/rust/lance-io/src/lib.rs
@@ -27,13 +27,14 @@ pub mod utils;
 pub use scheduler::{bytes_read_counter, iops_counter};
 
 /// Defines a selection of rows to read from a file/batch
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Default)]
 pub enum ReadBatchParams {
     /// Select a contiguous range of rows
     Range(Range<usize>),
     /// Select multiple contiguous ranges of rows
     Ranges(Arc<[Range<u64>]>),
     /// Select all rows (this is the default)
+    #[default]
     RangeFull,
     /// Select all rows up to a given index
     RangeTo(RangeTo<usize>),
@@ -77,13 +78,6 @@ impl std::fmt::Display for ReadBatchParams {
     }
 }
 
-impl Default for ReadBatchParams {
-    fn default() -> Self {
-        // Default of ReadBatchParams is reading the full batch.
-        Self::RangeFull
-    }
-}
-
 impl From<&[u32]> for ReadBatchParams {
     fn from(value: &[u32]) -> Self {
         Self::Indices(UInt32Array::from_iter_values(value.iter().copied()))

diff --git a/rust/lance/src/dataset/write/merge_insert/inserted_rows.rs b/rust/lance/src/dataset/write/merge_insert/inserted_rows.rs
@@ -27,7 +27,7 @@ pub enum KeyValue {
     Int64(i64),
     UInt64(u64),
     Binary(Vec<u8>),
-    Composite(Vec<KeyValue>),
+    Composite(Vec<Self>),
 }
 
 impl KeyValue {

diff --git a/rust/lance/src/index/scalar.rs b/rust/lance/src/index/scalar.rs
@@ -272,18 +272,19 @@ pub(super) async fn build_scalar_index(
     let training_request =
         plugin.new_training_request(params.params.as_deref().unwrap_or("{}"), &field)?;
 
-    let training_data = if preprocessed_data.is_none() {
-        load_training_data(
-            dataset,
-            column,
-            training_request.criteria(),
-            None,
-            train,
-            fragment_ids.clone(),
-        )
-        .await?
-    } else {
-        preprocessed_data.unwrap()
+    let training_data = match preprocessed_data {
+        Some(preprocessed_data) => preprocessed_data,
+        None => {
+            load_training_data(
+                dataset,
+                column,
+                training_request.criteria(),
+                None,
+                train,
+                fragment_ids.clone(),
+            )
+            .await?
+        }
     };
 
     plugin

diff --git a/rust/lance/src/index/vector/ivf.rs b/rust/lance/src/index/vector/ivf.rs
@@ -1231,8 +1231,7 @@ pub async fn build_ivf_model(
 ) -> Result<IvfModel> {
     let num_partitions = params.num_partitions.unwrap();
     let centroids = params.centroids.clone();
-    if centroids.is_some() && !params.retrain {
-        let centroids = centroids.unwrap();
+    if let (Some(centroids), false) = (centroids.as_deref(), params.retrain) {
         info!("Pre-computed IVF centroids is provided, skip IVF training");
         if centroids.values().len() != num_partitions * dim {
             return Err(Error::Index {
@@ -1244,7 +1243,7 @@ pub async fn build_ivf_model(
                 location: location!(),
             });
         }
-        return Ok(IvfModel::new(centroids.as_ref().clone(), None));
+        return Ok(IvfModel::new(centroids.clone(), None));
     }
     let sample_size_hint = num_partitions * params.sample_rate;
 

diff --git a/rust/lance/src/io/exec/projection.rs b/rust/lance/src/io/exec/projection.rs
@@ -152,7 +152,7 @@ pub enum Selection<'a> {
     /// Selects this fields and all subfields
     FullField(&'a str),
     /// For a struct, selections of subfields
-    StructProjection(&'a str, Vec<Selection<'a>>),
+    StructProjection(&'a str, Vec<Self>),
 }
 
 impl Selection<'_> {