From 0b0977f9e953f8838676b1130ceccea762f1d6e4 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 12 Jan 2026 13:37:37 -0800 Subject: [PATCH 1/4] fix: project_by_schema now reorders fields inside List types Previously, project_by_schema only recursively handled direct Struct fields. List, LargeList, and FixedSizeList types fell through to the default case which cloned them without reordering inner struct fields. This caused Arrow validation errors when reading fragments where fields were stored out of order (scrambled `fields` array in DataFile metadata) combined with schema evolution requiring null-filling. Fixes #5702 Co-Authored-By: Claude Opus 4.5 --- rust/lance-arrow/src/lib.rs | 296 ++++++++++++++++++++++++++++++++++-- 1 file changed, 286 insertions(+), 10 deletions(-) diff --git a/rust/lance-arrow/src/lib.rs b/rust/lance-arrow/src/lib.rs index 97738938ef2..83b8b65d954 100644 --- a/rust/lance-arrow/src/lib.rs +++ b/rust/lance-arrow/src/lib.rs @@ -795,6 +795,49 @@ impl RecordBatchExt for RecordBatch { } } +/// Recursively projects an array to match the target field's structure. +/// This handles reordering fields inside nested List types. +fn project_array(array: &ArrayRef, target_field: &Field) -> Result { + match target_field.data_type() { + DataType::Struct(subfields) => { + let struct_arr = array.as_struct(); + let projected = project(struct_arr, subfields)?; + Ok(Arc::new(projected)) + } + DataType::List(inner_field) => { + let list_arr: &ListArray = array.as_list(); + let projected_values = project_array(list_arr.values(), inner_field.as_ref())?; + Ok(Arc::new(ListArray::new( + inner_field.clone(), + list_arr.offsets().clone(), + projected_values, + list_arr.nulls().cloned(), + ))) + } + DataType::LargeList(inner_field) => { + let list_arr: &LargeListArray = array.as_list(); + let projected_values = project_array(list_arr.values(), inner_field.as_ref())?; + Ok(Arc::new(LargeListArray::new( + inner_field.clone(), + list_arr.offsets().clone(), + projected_values, + list_arr.nulls().cloned(), + ))) + } + DataType::FixedSizeList(inner_field, size) => { + let list_arr = array.as_fixed_size_list(); + let projected_values = project_array(list_arr.values(), inner_field.as_ref())?; + Ok(Arc::new(FixedSizeListArray::new( + inner_field.clone(), + *size, + projected_values, + list_arr.nulls().cloned(), + ))) + } + _ => Ok(array.clone()), + } +} + fn project(struct_array: &StructArray, fields: &Fields) -> Result { if fields.is_empty() { return Ok(StructArray::new_empty_fields( @@ -805,16 +848,8 @@ fn project(struct_array: &StructArray, fields: &Fields) -> Result { let mut columns: Vec = vec![]; for field in fields.iter() { if let Some(col) = struct_array.column_by_name(field.name()) { - match field.data_type() { - // TODO handle list-of-struct - DataType::Struct(subfields) => { - let projected = project(col.as_struct(), subfields)?; - columns.push(Arc::new(projected)); - } - _ => { - columns.push(col.clone()); - } - } + let projected = project_array(col, field.as_ref())?; + columns.push(projected); } else { return Err(ArrowError::SchemaError(format!( "field {} does not exist in the RecordBatch", @@ -2244,4 +2279,245 @@ mod tests { let merged_array = merge_with_schema(&left_list_struct, &right_list_struct, &target_fields); assert_eq!(merged_array.len(), 2); } + + #[test] + fn test_project_by_schema_list_struct_reorder() { + // Test that project_by_schema correctly reorders fields inside List + // This is a regression test for issue #5702 + + // Source schema with inner struct fields in order: c, b, a + let source_inner_struct = DataType::Struct(Fields::from(vec![ + Field::new("c", DataType::Utf8, true), + Field::new("b", DataType::Utf8, true), + Field::new("a", DataType::Utf8, true), + ])); + let source_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new( + "data", + DataType::List(Arc::new(Field::new( + "item", + source_inner_struct.clone(), + true, + ))), + true, + ), + ])); + + // Create source data with c, b, a order + let c_array = StringArray::from(vec!["c1", "c2"]); + let b_array = StringArray::from(vec!["b1", "b2"]); + let a_array = StringArray::from(vec!["a1", "a2"]); + let inner_struct = StructArray::from(vec![ + ( + Arc::new(Field::new("c", DataType::Utf8, true)), + Arc::new(c_array) as ArrayRef, + ), + ( + Arc::new(Field::new("b", DataType::Utf8, true)), + Arc::new(b_array) as ArrayRef, + ), + ( + Arc::new(Field::new("a", DataType::Utf8, true)), + Arc::new(a_array) as ArrayRef, + ), + ]); + + let list_array = ListArray::new( + Arc::new(Field::new("item", source_inner_struct, true)), + OffsetBuffer::from_lengths([1, 1]), + Arc::new(inner_struct), + None, + ); + + let batch = RecordBatch::try_new( + source_schema, + vec![Arc::new(Int32Array::from(vec![1, 2])), Arc::new(list_array)], + ) + .unwrap(); + + // Target schema with inner struct fields in order: a, b, c + let target_inner_struct = DataType::Struct(Fields::from(vec![ + Field::new("a", DataType::Utf8, true), + Field::new("b", DataType::Utf8, true), + Field::new("c", DataType::Utf8, true), + ])); + let target_schema = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new( + "data", + DataType::List(Arc::new(Field::new("item", target_inner_struct, true))), + true, + ), + ]); + + // Project should reorder the inner struct fields + let projected = batch.project_by_schema(&target_schema).unwrap(); + + // Verify the schema is correct + assert_eq!(projected.schema().as_ref(), &target_schema); + + // Verify the data is correct by checking inner struct field order + let projected_list = projected.column(1).as_list::(); + let projected_struct = projected_list.values().as_struct(); + + // Fields should now be in order: a, b, c + assert_eq!( + projected_struct.column_by_name("a").unwrap().as_ref(), + &StringArray::from(vec!["a1", "a2"]) as &dyn Array + ); + assert_eq!( + projected_struct.column_by_name("b").unwrap().as_ref(), + &StringArray::from(vec!["b1", "b2"]) as &dyn Array + ); + assert_eq!( + projected_struct.column_by_name("c").unwrap().as_ref(), + &StringArray::from(vec!["c1", "c2"]) as &dyn Array + ); + + // Also verify positional access matches expected order (a=0, b=1, c=2) + assert_eq!( + projected_struct.column(0).as_ref(), + &StringArray::from(vec!["a1", "a2"]) as &dyn Array + ); + assert_eq!( + projected_struct.column(1).as_ref(), + &StringArray::from(vec!["b1", "b2"]) as &dyn Array + ); + assert_eq!( + projected_struct.column(2).as_ref(), + &StringArray::from(vec!["c1", "c2"]) as &dyn Array + ); + } + + #[test] + fn test_project_by_schema_nested_list_struct() { + // Test deeply nested List>> projection + let inner_struct = DataType::Struct(Fields::from(vec![ + Field::new("y", DataType::Int32, true), + Field::new("x", DataType::Int32, true), + ])); + let source_schema = Arc::new(Schema::new(vec![Field::new( + "outer", + DataType::List(Arc::new(Field::new( + "item", + DataType::Struct(Fields::from(vec![ + Field::new("b", DataType::Utf8, true), + Field::new( + "inner_list", + DataType::List(Arc::new(Field::new("item", inner_struct.clone(), true))), + true, + ), + Field::new("a", DataType::Utf8, true), + ])), + true, + ))), + true, + )])); + + // Create deeply nested data + let y_array = Int32Array::from(vec![1, 2]); + let x_array = Int32Array::from(vec![3, 4]); + let innermost_struct = StructArray::from(vec![ + ( + Arc::new(Field::new("y", DataType::Int32, true)), + Arc::new(y_array) as ArrayRef, + ), + ( + Arc::new(Field::new("x", DataType::Int32, true)), + Arc::new(x_array) as ArrayRef, + ), + ]); + let inner_list = ListArray::new( + Arc::new(Field::new("item", inner_struct.clone(), true)), + OffsetBuffer::from_lengths([2]), + Arc::new(innermost_struct), + None, + ); + + let b_array = StringArray::from(vec!["b1"]); + let a_array = StringArray::from(vec!["a1"]); + let middle_struct = StructArray::from(vec![ + ( + Arc::new(Field::new("b", DataType::Utf8, true)), + Arc::new(b_array) as ArrayRef, + ), + ( + Arc::new(Field::new( + "inner_list", + DataType::List(Arc::new(Field::new("item", inner_struct, true))), + true, + )), + Arc::new(inner_list) as ArrayRef, + ), + ( + Arc::new(Field::new("a", DataType::Utf8, true)), + Arc::new(a_array) as ArrayRef, + ), + ]); + + let outer_list = ListArray::new( + Arc::new(Field::new("item", middle_struct.data_type().clone(), true)), + OffsetBuffer::from_lengths([1]), + Arc::new(middle_struct), + None, + ); + + let batch = + RecordBatch::try_new(source_schema, vec![Arc::new(outer_list) as ArrayRef]).unwrap(); + + // Target schema with reordered fields at all levels + let target_inner_struct = DataType::Struct(Fields::from(vec![ + Field::new("x", DataType::Int32, true), // x before y now + Field::new("y", DataType::Int32, true), + ])); + let target_schema = Schema::new(vec![Field::new( + "outer", + DataType::List(Arc::new(Field::new( + "item", + DataType::Struct(Fields::from(vec![ + Field::new("a", DataType::Utf8, true), // a before b now + Field::new( + "inner_list", + DataType::List(Arc::new(Field::new("item", target_inner_struct, true))), + true, + ), + Field::new("b", DataType::Utf8, true), + ])), + true, + ))), + true, + )]); + + let projected = batch.project_by_schema(&target_schema).unwrap(); + + // Verify schema + assert_eq!(projected.schema().as_ref(), &target_schema); + + // Verify deeply nested data is reordered correctly + let outer_list = projected.column(0).as_list::(); + let middle_struct = outer_list.values().as_struct(); + + // Middle struct should have a first, then inner_list, then b + assert_eq!( + middle_struct.column(0).as_ref(), + &StringArray::from(vec!["a1"]) as &dyn Array + ); + assert_eq!( + middle_struct.column(2).as_ref(), + &StringArray::from(vec!["b1"]) as &dyn Array + ); + + // Inner list's struct should have x first, then y + let inner_list = middle_struct.column(1).as_list::(); + let innermost_struct = inner_list.values().as_struct(); + assert_eq!( + innermost_struct.column(0).as_ref(), + &Int32Array::from(vec![3, 4]) as &dyn Array + ); + assert_eq!( + innermost_struct.column(1).as_ref(), + &Int32Array::from(vec![1, 2]) as &dyn Array + ); + } } From 40674572b05e3ddd76c84bdc55ce4e65a4cfcf4f Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 12 Jan 2026 14:45:12 -0800 Subject: [PATCH 2/4] test: add integration test for List field reordering (issue #5702) Adds test data and integration test that reproduces the original bug: - Fragment 0: List> with all fields + "extra" column - Fragment 1: List> with reordered/missing inner struct fields This combination of out-of-order field storage + schema evolution inside the List triggers project_by_schema to reorder fields. Before the fix, this would fail with: "Incorrect datatype for StructArray field expected List(Struct(...)) got List(Struct(...))" Also adds a direct unit test in dataset_schema_evolution.rs that tests the project_by_schema function with misordered List fields. Fixes #5702 Co-Authored-By: Claude Opus 4.5 --- .../src/dataset/tests/dataset_migrations.rs | 34 ++++++ .../dataset/tests/dataset_schema_evolution.rs | 107 ++++++++++++++++- .../list_struct_field_reorder/datagen.py | 108 ++++++++++++++++++ ...0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn | Bin 0 -> 314 bytes ...1-d8bb22b2-8121-416b-bf98-ab101b03607b.txn | Bin 0 -> 126 bytes .../_versions/1.manifest | Bin 0 -> 713 bytes .../_versions/2.manifest | Bin 0 -> 610 bytes ...0001101010e76d574ef8911733dd148c875c.lance | Bin 0 -> 1805 bytes ...111001011144d87442baad032b53e7f244a7.lance | Bin 0 -> 1212 bytes 9 files changed, 247 insertions(+), 2 deletions(-) create mode 100644 test_data/list_struct_field_reorder/datagen.py create mode 100644 test_data/list_struct_field_reorder/list_struct_reorder.lance/_transactions/0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn create mode 100644 test_data/list_struct_field_reorder/list_struct_reorder.lance/_transactions/1-d8bb22b2-8121-416b-bf98-ab101b03607b.txn create mode 100644 test_data/list_struct_field_reorder/list_struct_reorder.lance/_versions/1.manifest create mode 100644 test_data/list_struct_field_reorder/list_struct_reorder.lance/_versions/2.manifest create mode 100644 test_data/list_struct_field_reorder/list_struct_reorder.lance/data/001011110010000001101010e76d574ef8911733dd148c875c.lance create mode 100644 test_data/list_struct_field_reorder/list_struct_reorder.lance/data/00111000101101111001011144d87442baad032b53e7f244a7.lance diff --git a/rust/lance/src/dataset/tests/dataset_migrations.rs b/rust/lance/src/dataset/tests/dataset_migrations.rs index abccd20edd3..894f52a22f8 100644 --- a/rust/lance/src/dataset/tests/dataset_migrations.rs +++ b/rust/lance/src/dataset/tests/dataset_migrations.rs @@ -375,3 +375,37 @@ async fn test_max_fragment_id_migration() { assert_eq!(dataset.manifest.max_fragment_id(), Some(2)); } } + +/// Regression test for issue #5702: project_by_schema should reorder fields inside List. +/// +/// This test reads a dataset with: +/// - Fragment 0: List> with all fields + "extra" column +/// - Fragment 1: List> with reordered/missing inner struct fields +/// +/// Before the fix, reading would fail with: +/// "Incorrect datatype for StructArray field expected List(Struct(...)) got List(Struct(...))" +#[tokio::test] +async fn test_list_struct_field_reorder_issue_5702() { + let test_dir = copy_test_data_to_tmp("list_struct_field_reorder/list_struct_reorder.lance") + .expect("Failed to copy test data"); + let test_uri = test_dir.path_str(); + + let dataset = Dataset::open(&test_uri) + .await + .expect("Failed to open dataset"); + + // Verify we have 2 fragments + assert_eq!(dataset.get_fragments().len(), 2); + + // This read would fail before the fix for #5702 + let batches = scan_dataset(&test_uri) + .await + .expect("Failed to scan dataset"); + let batch = concat_batches(&batches[0].schema(), batches.iter()).expect("Failed to concat"); + + // Verify we got all 4 rows + assert_eq!(batch.num_rows(), 4); + + // Verify schema has expected columns + assert_eq!(batch.schema().fields().len(), 3); // id, data, extra +} diff --git a/rust/lance/src/dataset/tests/dataset_schema_evolution.rs b/rust/lance/src/dataset/tests/dataset_schema_evolution.rs index fd988978991..2c4a2f82da0 100644 --- a/rust/lance/src/dataset/tests/dataset_schema_evolution.rs +++ b/rust/lance/src/dataset/tests/dataset_schema_evolution.rs @@ -4,12 +4,14 @@ use crate::dataset::{NewColumnTransform, WriteMode, WriteParams}; use crate::Dataset; use arrow_array::{ - Array, ArrayRef, FixedSizeListArray, Int32Array, ListArray, RecordBatch, RecordBatchIterator, - StringArray, StructArray, + cast::AsArray, Array, ArrayRef, FixedSizeListArray, Int32Array, ListArray, RecordBatch, + RecordBatchIterator, StringArray, StructArray, }; +use arrow_buffer::OffsetBuffer; use arrow_schema::{ DataType, Field as ArrowField, Field, Fields as ArrowFields, Fields, Schema as ArrowSchema, }; +use lance_arrow::RecordBatchExt; use lance_encoding::version::LanceFileVersion; use rstest::rstest; use std::collections::HashMap; @@ -546,3 +548,104 @@ async fn prepare_initial_dataset_with_list_struct_col(version: LanceFileVersion) dataset } + +/// Regression test for issue #5702: project_by_schema should reorder fields inside List. +/// +/// This test simulates the scenario where a fragment's data file has fields stored in a +/// different order than the schema expects. When reading such fragments, project_by_schema +/// is called to reorder the columns, and it must handle nested List types correctly. +#[test] +fn test_project_by_schema_list_struct_field_reorder_regression() { + // Create a RecordBatch with List where inner struct fields are in "wrong" order (c, b, a) + // This simulates reading from a fragment where DataFile.fields has non-sequential field IDs + let source_inner_struct = DataType::Struct(Fields::from(vec![ + Field::new("c", DataType::Utf8, true), + Field::new("b", DataType::Utf8, true), + Field::new("a", DataType::Utf8, true), + ])); + let source_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new( + "data", + DataType::List(Arc::new(Field::new( + "item", + source_inner_struct.clone(), + true, + ))), + true, + ), + ])); + + // Create source data with fields in c, b, a order + let c_array = StringArray::from(vec!["c1", "c2"]); + let b_array = StringArray::from(vec!["b1", "b2"]); + let a_array = StringArray::from(vec!["a1", "a2"]); + let inner_struct = StructArray::from(vec![ + ( + Arc::new(Field::new("c", DataType::Utf8, true)), + Arc::new(c_array) as ArrayRef, + ), + ( + Arc::new(Field::new("b", DataType::Utf8, true)), + Arc::new(b_array) as ArrayRef, + ), + ( + Arc::new(Field::new("a", DataType::Utf8, true)), + Arc::new(a_array) as ArrayRef, + ), + ]); + + let list_array = ListArray::new( + Arc::new(Field::new("item", source_inner_struct, true)), + OffsetBuffer::from_lengths([1, 1]), + Arc::new(inner_struct), + None, + ); + + let batch = RecordBatch::try_new( + source_schema, + vec![Arc::new(Int32Array::from(vec![1, 2])), Arc::new(list_array)], + ) + .unwrap(); + + // Target schema expects inner struct fields in "correct" order (a, b, c) + let target_inner_struct = DataType::Struct(Fields::from(vec![ + Field::new("a", DataType::Utf8, true), + Field::new("b", DataType::Utf8, true), + Field::new("c", DataType::Utf8, true), + ])); + let target_schema = ArrowSchema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new( + "data", + DataType::List(Arc::new(Field::new("item", target_inner_struct, true))), + true, + ), + ]); + + // This is the same project_by_schema call that happens in fragment.rs:2566 + // Before the fix for #5702, this would fail with: + // "Incorrect datatype for StructArray field \"data\", expected List(Struct(\"a\": Utf8, ...)) got List(Struct(\"c\": Utf8, ...))" + let projected = batch.project_by_schema(&target_schema).unwrap(); + + // Verify the schema is correct + assert_eq!(projected.schema().as_ref(), &target_schema); + + // Verify the data is correctly reordered + let projected_list = projected.column(1).as_list::(); + let projected_struct = projected_list.values().as_struct(); + + // Fields should now be in order: a, b, c (by position) + assert_eq!( + projected_struct.column(0).as_ref(), + &StringArray::from(vec!["a1", "a2"]) as &dyn Array + ); + assert_eq!( + projected_struct.column(1).as_ref(), + &StringArray::from(vec!["b1", "b2"]) as &dyn Array + ); + assert_eq!( + projected_struct.column(2).as_ref(), + &StringArray::from(vec!["c1", "c2"]) as &dyn Array + ); +} diff --git a/test_data/list_struct_field_reorder/datagen.py b/test_data/list_struct_field_reorder/datagen.py new file mode 100644 index 00000000000..feeb3b76502 --- /dev/null +++ b/test_data/list_struct_field_reorder/datagen.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +""" +Generate test data for issue #5702: project_by_schema should reorder fields inside List. + +This script creates a dataset where: +1. Fragment 0 has List> with all fields + an extra top-level column +2. Fragment 1 has List with: + - Inner struct fields in different order (c, b) + - Missing inner struct field "a" + - Missing top-level column "extra" + +The combination of out-of-order field storage + schema evolution inside the List +triggers the bug where project_by_schema fails to reorder fields. + +Before the fix, reading would fail with: +"Incorrect datatype for StructArray field expected List(Struct(...)) got List(Struct(...))" + +Usage: + pip install pylance==2.0.0-beta.8 + python datagen.py +""" + +import lance +import pyarrow as pa + +# Assert the version to document which version was used to create the test data +assert lance.__version__ == "2.0.0-beta.8", ( + f"Expected pylance 2.0.0-beta.8, got {lance.__version__}" +) +print(f"Lance version: {lance.__version__}") + +# Schema with List> and an extra column +inner_struct_type = pa.struct( + [ + pa.field("a", pa.utf8()), + pa.field("b", pa.utf8()), + pa.field("c", pa.utf8()), + ] +) +schema = pa.schema( + [ + pa.field("id", pa.int32()), + pa.field("data", pa.list_(pa.field("item", inner_struct_type))), + pa.field("extra", pa.utf8()), # This column will be missing in fragment 1 + ] +) + +# Fragment 0: data with fields in schema order (a, b, c) + extra column +fragment0_data = pa.table( + { + "id": pa.array([1, 2], type=pa.int32()), + "data": pa.array( + [ + [{"a": "a1", "b": "b1", "c": "c1"}], + [{"a": "a2", "b": "b2", "c": "c2"}], + ], + type=pa.list_(pa.field("item", inner_struct_type)), + ), + "extra": pa.array(["extra1", "extra2"], type=pa.utf8()), + }, + schema=schema, +) + +# Create dataset with first fragment +dataset_path = "list_struct_reorder.lance" +lance.write_dataset(fragment0_data, dataset_path, mode="create") +print("Created dataset with fragment 0") + +# Fragment 1: data with inner struct fields reordered AND missing field "a" +inner_struct_type_reordered = pa.struct( + [ + pa.field("c", pa.utf8()), + pa.field("b", pa.utf8()), + # Note: field "a" is intentionally missing from the inner struct + ] +) +schema_reordered = pa.schema( + [ + pa.field("id", pa.int32()), + pa.field("data", pa.list_(pa.field("item", inner_struct_type_reordered))), + # Note: "extra" column is also missing + ] +) + +fragment1_data = pa.table( + { + "id": pa.array([3, 4], type=pa.int32()), + "data": pa.array( + [ + [{"c": "c3", "b": "b3"}], # Missing "a" field + [{"c": "c4", "b": "b4"}], + ], + type=pa.list_(pa.field("item", inner_struct_type_reordered)), + ), + }, + schema=schema_reordered, +) + +# Append second fragment with reordered and missing inner struct fields +lance.write_dataset(fragment1_data, dataset_path, mode="append") +print("Appended fragment 1 with reordered inner struct") + +# Verify the test data was created correctly +ds = lance.dataset(dataset_path) +print(f"\nDataset created with {len(ds.get_fragments())} fragments") +for i, frag in enumerate(ds.get_fragments()): + for df in frag.metadata.data_files(): + print(f"Fragment {i}: fields={df.fields}") diff --git a/test_data/list_struct_field_reorder/list_struct_reorder.lance/_transactions/0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn b/test_data/list_struct_field_reorder/list_struct_reorder.lance/_transactions/0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn new file mode 100644 index 0000000000000000000000000000000000000000..2a432f41e0a2e6fcd93e0732f3ee1d7c67a6beae GIT binary patch literal 314 zcmZXNL2AP=5JfA+cEN!xrd^1vw8+ZfAl0%ZA0vw%K$gZqka&?OkV_PLmR_*6O%0*_ zvzX7D&mY*;C}h)ivTTKJO66T$8s)I;9ZKxAR;}&6vQL`7!yz|9NFhf}d)8>DLEZRT zE9^}xrLHRPr83Uw+U-wva0t&&gsze_+b)Jv@v-3)wm^gD--}3*h9OpOf*1ljNW6`f zk;nV`6s~c9jhCB?bLth65b?|?Cq=e!cmZPBiqjc|;W$R=1Ur`ehmNIxAcqw1@nTu_ K_5Jq8%;3#EED+=2QA4)7DQ6zM_p;O_Gq`hl$9RzU|!9j4%8S?n#@!pr0*9Vq`Td}6Bm!vF6DDMSPrs6ox1Z zhjhB_L)^qL&-~9?pzY>`>2@d(0JpG{Nh1kZeBT=8V-t?e=vtXs)*g0pqpt|`LEEc2 z-o-!?+lMoh7Z+tJtYAuy~sXdNG(9DM-*9K)Xg literal 0 HcmV?d00001 diff --git a/test_data/list_struct_field_reorder/list_struct_reorder.lance/_versions/2.manifest b/test_data/list_struct_field_reorder/list_struct_reorder.lance/_versions/2.manifest new file mode 100644 index 0000000000000000000000000000000000000000..9b20f0c09de3b4f2c01ca282ad57fd3538d3e101 GIT binary patch literal 610 zcmb`FKTE?v6u@(rKh{`8PLYr)L#IN*-R0U`-NaReQUuq#B$Y_b(4J5n9GrFZ3kZTA z!f&A8!okhST`#eb(#gR$+t_{Oz0o2L3l$1&uazaVSNMD9>Jm8^_1QUtl zK2GF=GjfOM$mJLlLa;t{u1a0knX!x~j8Q2>hGQywah{ARVufPD=`CLtf=Pb{z+An&c6Ev)S{A$ozDm|4D zoG1OPm#}=y2ZSVXoMnXZlqbD(;bW~9VLN_PXeW1@0YoFclm8a&!M%3t?c@H%*}8rI xd^Asi?2_;=i;vX2Jl}Qg#S&?RBkVgeS0dt9?S`A0KLO)L|EQmU#^L_)!8aelmc;-7 literal 0 HcmV?d00001 diff --git a/test_data/list_struct_field_reorder/list_struct_reorder.lance/data/001011110010000001101010e76d574ef8911733dd148c875c.lance b/test_data/list_struct_field_reorder/list_struct_reorder.lance/data/001011110010000001101010e76d574ef8911733dd148c875c.lance new file mode 100644 index 0000000000000000000000000000000000000000..c5b72a92b5a012be05f15e769b94a6dd558ff111 GIT binary patch literal 1805 zcmc(g-%G+!6vub(?rfY4J>Z2v2C3ULJ|iP}&Gr1FQh}5S)3jd?ND0IueVk1`S=VcPCP5;B0U<>$JHxSLyUD$@L!B#xuA3w;RjYlC8hFMlN|`u9_~PHE{f)3 zO>zTlNJa!{iV4ISR-rN%u~EwWo(9DdbsecrxE5!P0i2HL^5E)7T^^i`n1Cn$W$kMf zb$nf1d1RiY?n=$&^}m(6EA^AqA5;2X8waBJ`;;EatNtqMBfcZA-JARmK3*$fj;dVM GuIpcLM5tH* literal 0 HcmV?d00001 diff --git a/test_data/list_struct_field_reorder/list_struct_reorder.lance/data/00111000101101111001011144d87442baad032b53e7f244a7.lance b/test_data/list_struct_field_reorder/list_struct_reorder.lance/data/00111000101101111001011144d87442baad032b53e7f244a7.lance new file mode 100644 index 0000000000000000000000000000000000000000..3e98d0211818d2e254db582cb574730386d957ad GIT binary patch literal 1212 zcmb`H&r1S96vyYy?%ML|u)I7h619l^lGsEgL?Sc`Iz)8sxULclx|*(s4uM3_pVZZV zrN5+Rb+)t)GTDP=_WS02=QD3$Mq`ZW;=B4|fbhn_XM^%Oi7(zm9ID&!OMY3^Ghe?d zzgcRo#K&jA4Dq1ld?W#RBM75Xu>cjo9O*4D@@$x&U+)CN$Q?$5ksnQ~(xeAbyK8d? z@~I?`n*_dXIDDE4!h2E5<7Cs;7n2spL)yn(+&S)eq2G4fq2Fr>$Ixx`I-_oQDK&9q zbc(dE`F%{4SLXE%@pej}HWGN_}CH#ifh*_Gg z)Y*|h9nc=;mZErGsgqmO$1nlnmM#$IRKv5Sh^=B~u^SSHXqv=Lunm=CjNyF3{|~+< yFHJDJvaB!lgVgU*-$`B7W Date: Mon, 12 Jan 2026 14:54:44 -0800 Subject: [PATCH 3/4] refactor: address PR feedback for test data - Move test data to version-specific directory (v1.0.1) - Replace prints with assertions in datagen.py - Add assertion that scanning fails with issue #5702 error - Remove redundant test from dataset_schema_evolution.rs Co-Authored-By: Claude Opus 4.5 --- .../src/dataset/tests/dataset_migrations.rs | 2 +- .../dataset/tests/dataset_schema_evolution.rs | 107 +----------------- .../datagen.py | 38 +++++-- ...0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn | Bin ...1-d8bb22b2-8121-416b-bf98-ab101b03607b.txn | Bin .../_versions/1.manifest | Bin .../_versions/2.manifest | Bin ...0001101010e76d574ef8911733dd148c875c.lance | Bin ...111001011144d87442baad032b53e7f244a7.lance | Bin 9 files changed, 29 insertions(+), 118 deletions(-) rename test_data/{list_struct_field_reorder => v1.0.1}/datagen.py (70%) rename test_data/{list_struct_field_reorder => v1.0.1}/list_struct_reorder.lance/_transactions/0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn (100%) rename test_data/{list_struct_field_reorder => v1.0.1}/list_struct_reorder.lance/_transactions/1-d8bb22b2-8121-416b-bf98-ab101b03607b.txn (100%) rename test_data/{list_struct_field_reorder => v1.0.1}/list_struct_reorder.lance/_versions/1.manifest (100%) rename test_data/{list_struct_field_reorder => v1.0.1}/list_struct_reorder.lance/_versions/2.manifest (100%) rename test_data/{list_struct_field_reorder => v1.0.1}/list_struct_reorder.lance/data/001011110010000001101010e76d574ef8911733dd148c875c.lance (100%) rename test_data/{list_struct_field_reorder => v1.0.1}/list_struct_reorder.lance/data/00111000101101111001011144d87442baad032b53e7f244a7.lance (100%) diff --git a/rust/lance/src/dataset/tests/dataset_migrations.rs b/rust/lance/src/dataset/tests/dataset_migrations.rs index 894f52a22f8..0f02425b0dd 100644 --- a/rust/lance/src/dataset/tests/dataset_migrations.rs +++ b/rust/lance/src/dataset/tests/dataset_migrations.rs @@ -386,7 +386,7 @@ async fn test_max_fragment_id_migration() { /// "Incorrect datatype for StructArray field expected List(Struct(...)) got List(Struct(...))" #[tokio::test] async fn test_list_struct_field_reorder_issue_5702() { - let test_dir = copy_test_data_to_tmp("list_struct_field_reorder/list_struct_reorder.lance") + let test_dir = copy_test_data_to_tmp("v1.0.1/list_struct_reorder.lance") .expect("Failed to copy test data"); let test_uri = test_dir.path_str(); diff --git a/rust/lance/src/dataset/tests/dataset_schema_evolution.rs b/rust/lance/src/dataset/tests/dataset_schema_evolution.rs index 2c4a2f82da0..fd988978991 100644 --- a/rust/lance/src/dataset/tests/dataset_schema_evolution.rs +++ b/rust/lance/src/dataset/tests/dataset_schema_evolution.rs @@ -4,14 +4,12 @@ use crate::dataset::{NewColumnTransform, WriteMode, WriteParams}; use crate::Dataset; use arrow_array::{ - cast::AsArray, Array, ArrayRef, FixedSizeListArray, Int32Array, ListArray, RecordBatch, - RecordBatchIterator, StringArray, StructArray, + Array, ArrayRef, FixedSizeListArray, Int32Array, ListArray, RecordBatch, RecordBatchIterator, + StringArray, StructArray, }; -use arrow_buffer::OffsetBuffer; use arrow_schema::{ DataType, Field as ArrowField, Field, Fields as ArrowFields, Fields, Schema as ArrowSchema, }; -use lance_arrow::RecordBatchExt; use lance_encoding::version::LanceFileVersion; use rstest::rstest; use std::collections::HashMap; @@ -548,104 +546,3 @@ async fn prepare_initial_dataset_with_list_struct_col(version: LanceFileVersion) dataset } - -/// Regression test for issue #5702: project_by_schema should reorder fields inside List. -/// -/// This test simulates the scenario where a fragment's data file has fields stored in a -/// different order than the schema expects. When reading such fragments, project_by_schema -/// is called to reorder the columns, and it must handle nested List types correctly. -#[test] -fn test_project_by_schema_list_struct_field_reorder_regression() { - // Create a RecordBatch with List where inner struct fields are in "wrong" order (c, b, a) - // This simulates reading from a fragment where DataFile.fields has non-sequential field IDs - let source_inner_struct = DataType::Struct(Fields::from(vec![ - Field::new("c", DataType::Utf8, true), - Field::new("b", DataType::Utf8, true), - Field::new("a", DataType::Utf8, true), - ])); - let source_schema = Arc::new(ArrowSchema::new(vec![ - Field::new("id", DataType::Int32, false), - Field::new( - "data", - DataType::List(Arc::new(Field::new( - "item", - source_inner_struct.clone(), - true, - ))), - true, - ), - ])); - - // Create source data with fields in c, b, a order - let c_array = StringArray::from(vec!["c1", "c2"]); - let b_array = StringArray::from(vec!["b1", "b2"]); - let a_array = StringArray::from(vec!["a1", "a2"]); - let inner_struct = StructArray::from(vec![ - ( - Arc::new(Field::new("c", DataType::Utf8, true)), - Arc::new(c_array) as ArrayRef, - ), - ( - Arc::new(Field::new("b", DataType::Utf8, true)), - Arc::new(b_array) as ArrayRef, - ), - ( - Arc::new(Field::new("a", DataType::Utf8, true)), - Arc::new(a_array) as ArrayRef, - ), - ]); - - let list_array = ListArray::new( - Arc::new(Field::new("item", source_inner_struct, true)), - OffsetBuffer::from_lengths([1, 1]), - Arc::new(inner_struct), - None, - ); - - let batch = RecordBatch::try_new( - source_schema, - vec![Arc::new(Int32Array::from(vec![1, 2])), Arc::new(list_array)], - ) - .unwrap(); - - // Target schema expects inner struct fields in "correct" order (a, b, c) - let target_inner_struct = DataType::Struct(Fields::from(vec![ - Field::new("a", DataType::Utf8, true), - Field::new("b", DataType::Utf8, true), - Field::new("c", DataType::Utf8, true), - ])); - let target_schema = ArrowSchema::new(vec![ - Field::new("id", DataType::Int32, false), - Field::new( - "data", - DataType::List(Arc::new(Field::new("item", target_inner_struct, true))), - true, - ), - ]); - - // This is the same project_by_schema call that happens in fragment.rs:2566 - // Before the fix for #5702, this would fail with: - // "Incorrect datatype for StructArray field \"data\", expected List(Struct(\"a\": Utf8, ...)) got List(Struct(\"c\": Utf8, ...))" - let projected = batch.project_by_schema(&target_schema).unwrap(); - - // Verify the schema is correct - assert_eq!(projected.schema().as_ref(), &target_schema); - - // Verify the data is correctly reordered - let projected_list = projected.column(1).as_list::(); - let projected_struct = projected_list.values().as_struct(); - - // Fields should now be in order: a, b, c (by position) - assert_eq!( - projected_struct.column(0).as_ref(), - &StringArray::from(vec!["a1", "a2"]) as &dyn Array - ); - assert_eq!( - projected_struct.column(1).as_ref(), - &StringArray::from(vec!["b1", "b2"]) as &dyn Array - ); - assert_eq!( - projected_struct.column(2).as_ref(), - &StringArray::from(vec!["c1", "c2"]) as &dyn Array - ); -} diff --git a/test_data/list_struct_field_reorder/datagen.py b/test_data/v1.0.1/datagen.py similarity index 70% rename from test_data/list_struct_field_reorder/datagen.py rename to test_data/v1.0.1/datagen.py index feeb3b76502..dbe7f586c52 100644 --- a/test_data/list_struct_field_reorder/datagen.py +++ b/test_data/v1.0.1/datagen.py @@ -16,7 +16,7 @@ "Incorrect datatype for StructArray field expected List(Struct(...)) got List(Struct(...))" Usage: - pip install pylance==2.0.0-beta.8 + pip install pylance==1.0.1 python datagen.py """ @@ -24,10 +24,7 @@ import pyarrow as pa # Assert the version to document which version was used to create the test data -assert lance.__version__ == "2.0.0-beta.8", ( - f"Expected pylance 2.0.0-beta.8, got {lance.__version__}" -) -print(f"Lance version: {lance.__version__}") +assert lance.__version__ == "1.0.1", f"Expected pylance 1.0.1, got {lance.__version__}" # Schema with List> and an extra column inner_struct_type = pa.struct( @@ -64,7 +61,6 @@ # Create dataset with first fragment dataset_path = "list_struct_reorder.lance" lance.write_dataset(fragment0_data, dataset_path, mode="create") -print("Created dataset with fragment 0") # Fragment 1: data with inner struct fields reordered AND missing field "a" inner_struct_type_reordered = pa.struct( @@ -98,11 +94,29 @@ # Append second fragment with reordered and missing inner struct fields lance.write_dataset(fragment1_data, dataset_path, mode="append") -print("Appended fragment 1 with reordered inner struct") -# Verify the test data was created correctly +# Verify the test data structure ds = lance.dataset(dataset_path) -print(f"\nDataset created with {len(ds.get_fragments())} fragments") -for i, frag in enumerate(ds.get_fragments()): - for df in frag.metadata.data_files(): - print(f"Fragment {i}: fields={df.fields}") +assert len(ds.get_fragments()) == 2, "Expected 2 fragments" + +frag0_fields = ds.get_fragments()[0].metadata.data_files()[0].fields +frag1_fields = ds.get_fragments()[1].metadata.data_files()[0].fields + +# Fragment 0 should have sequential field IDs: [0, 1, 2, 3, 4, 5, 6] +# (id=0, data=1, item=2, a=3, b=4, c=5, extra=6) +assert frag0_fields == [0, 1, 2, 3, 4, 5, 6], f"Fragment 0 fields: {frag0_fields}" + +# Fragment 1 should have reordered field IDs: [0, 1, 2, 5, 4] +# (id=0, data=1, item=2, c=5, b=4) - note: a=3 and extra=6 are missing +assert frag1_fields == [0, 1, 2, 5, 4], f"Fragment 1 fields: {frag1_fields}" + +# Verify that scanning fails with the expected error (issue #5702) +try: + ds.to_table() + raise AssertionError("Expected scan to fail with issue #5702 error") +except Exception as e: + error_msg = str(e) + assert "Incorrect datatype for StructArray" in error_msg, f"Unexpected error: {e}" + assert "List(Struct" in error_msg, f"Unexpected error: {e}" + +print("Test data created successfully and verified issue #5702 is triggered") diff --git a/test_data/list_struct_field_reorder/list_struct_reorder.lance/_transactions/0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn b/test_data/v1.0.1/list_struct_reorder.lance/_transactions/0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn similarity index 100% rename from test_data/list_struct_field_reorder/list_struct_reorder.lance/_transactions/0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn rename to test_data/v1.0.1/list_struct_reorder.lance/_transactions/0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn diff --git a/test_data/list_struct_field_reorder/list_struct_reorder.lance/_transactions/1-d8bb22b2-8121-416b-bf98-ab101b03607b.txn b/test_data/v1.0.1/list_struct_reorder.lance/_transactions/1-d8bb22b2-8121-416b-bf98-ab101b03607b.txn similarity index 100% rename from test_data/list_struct_field_reorder/list_struct_reorder.lance/_transactions/1-d8bb22b2-8121-416b-bf98-ab101b03607b.txn rename to test_data/v1.0.1/list_struct_reorder.lance/_transactions/1-d8bb22b2-8121-416b-bf98-ab101b03607b.txn diff --git a/test_data/list_struct_field_reorder/list_struct_reorder.lance/_versions/1.manifest b/test_data/v1.0.1/list_struct_reorder.lance/_versions/1.manifest similarity index 100% rename from test_data/list_struct_field_reorder/list_struct_reorder.lance/_versions/1.manifest rename to test_data/v1.0.1/list_struct_reorder.lance/_versions/1.manifest diff --git a/test_data/list_struct_field_reorder/list_struct_reorder.lance/_versions/2.manifest b/test_data/v1.0.1/list_struct_reorder.lance/_versions/2.manifest similarity index 100% rename from test_data/list_struct_field_reorder/list_struct_reorder.lance/_versions/2.manifest rename to test_data/v1.0.1/list_struct_reorder.lance/_versions/2.manifest diff --git a/test_data/list_struct_field_reorder/list_struct_reorder.lance/data/001011110010000001101010e76d574ef8911733dd148c875c.lance b/test_data/v1.0.1/list_struct_reorder.lance/data/001011110010000001101010e76d574ef8911733dd148c875c.lance similarity index 100% rename from test_data/list_struct_field_reorder/list_struct_reorder.lance/data/001011110010000001101010e76d574ef8911733dd148c875c.lance rename to test_data/v1.0.1/list_struct_reorder.lance/data/001011110010000001101010e76d574ef8911733dd148c875c.lance diff --git a/test_data/list_struct_field_reorder/list_struct_reorder.lance/data/00111000101101111001011144d87442baad032b53e7f244a7.lance b/test_data/v1.0.1/list_struct_reorder.lance/data/00111000101101111001011144d87442baad032b53e7f244a7.lance similarity index 100% rename from test_data/list_struct_field_reorder/list_struct_reorder.lance/data/00111000101101111001011144d87442baad032b53e7f244a7.lance rename to test_data/v1.0.1/list_struct_reorder.lance/data/00111000101101111001011144d87442baad032b53e7f244a7.lance From a9818cc913391c5f1ae1de8c282afa4871536134 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 12 Jan 2026 14:58:42 -0800 Subject: [PATCH 4/4] fix: regenerate test data with pylance 1.0.1 Regenerated the test data using pylance 1.0.1 as intended. Also fixed assertion to match actual error message format. Co-Authored-By: Claude Opus 4.5 --- test_data/v1.0.1/datagen.py | 2 +- ... 0-cbdb49e0-e048-4062-8a1a-b56b9258a3e7.txn} | Bin 314 -> 314 bytes .../1-87766aea-beb2-4942-8830-df51d2f17492.txn | Bin 0 -> 126 bytes .../1-d8bb22b2-8121-416b-bf98-ab101b03607b.txn | Bin 126 -> 0 bytes .../_versions/1.manifest | Bin 713 -> 704 bytes .../_versions/2.manifest | Bin 610 -> 601 bytes ...01111111111861ef14d8abd303df7f4d9b261.lance} | Bin ...101100101002bf4794c4781d65d4cc3d6e658.lance} | Bin 8 files changed, 1 insertion(+), 1 deletion(-) rename test_data/v1.0.1/list_struct_reorder.lance/_transactions/{0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn => 0-cbdb49e0-e048-4062-8a1a-b56b9258a3e7.txn} (65%) create mode 100644 test_data/v1.0.1/list_struct_reorder.lance/_transactions/1-87766aea-beb2-4942-8830-df51d2f17492.txn delete mode 100644 test_data/v1.0.1/list_struct_reorder.lance/_transactions/1-d8bb22b2-8121-416b-bf98-ab101b03607b.txn rename test_data/v1.0.1/list_struct_reorder.lance/data/{00111000101101111001011144d87442baad032b53e7f244a7.lance => 010000111100101111111111861ef14d8abd303df7f4d9b261.lance} (100%) rename test_data/v1.0.1/list_struct_reorder.lance/data/{001011110010000001101010e76d574ef8911733dd148c875c.lance => 0101110001001101100101002bf4794c4781d65d4cc3d6e658.lance} (100%) diff --git a/test_data/v1.0.1/datagen.py b/test_data/v1.0.1/datagen.py index dbe7f586c52..4dc61a66559 100644 --- a/test_data/v1.0.1/datagen.py +++ b/test_data/v1.0.1/datagen.py @@ -117,6 +117,6 @@ except Exception as e: error_msg = str(e) assert "Incorrect datatype for StructArray" in error_msg, f"Unexpected error: {e}" - assert "List(Struct" in error_msg, f"Unexpected error: {e}" + assert "List(Field" in error_msg, f"Unexpected error: {e}" print("Test data created successfully and verified issue #5702 is triggered") diff --git a/test_data/v1.0.1/list_struct_reorder.lance/_transactions/0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn b/test_data/v1.0.1/list_struct_reorder.lance/_transactions/0-cbdb49e0-e048-4062-8a1a-b56b9258a3e7.txn similarity index 65% rename from test_data/v1.0.1/list_struct_reorder.lance/_transactions/0-5e0169b1-9076-4dc5-84ce-fce1ef7749ab.txn rename to test_data/v1.0.1/list_struct_reorder.lance/_transactions/0-cbdb49e0-e048-4062-8a1a-b56b9258a3e7.txn index 2a432f41e0a2e6fcd93e0732f3ee1d7c67a6beae..7d22a5037d7235d2f26c831a7ca984167b65df40 100644 GIT binary patch delta 135 zcmdnRw2P@;NF_NbCCS7x)j&7Zz{En=#K6o**CNp{Q8&rdEXmTy)FRP1)qE4%1SYOf zp+GJR149EtLqh`tAPr=I5J<$pz$hur#N5&(*~Hw!FvZL?#Uwe|IK?d0%+x|JCowNM ZRfwH|k%^gwl}!r9P+&6X<(pW)8vxcE9?t*( delta 135 zcmdnRw2P@;NX0bOz|hPx$xzqQz}!sNBqiBY*TN(@RW~g;)i5>9+}y-6F=-Rq1SYOf zp+GJR0|P??Lm&XrU;t!-V5+%UimACtYMOsJTy51m z%H$n$04I7w@rsGE{~Vt4!`^2GgbsGP|J@#uBQ`}6E>!{q(WE4~`iR$8d9Te*yTMH3 NdCPuW4175Jq8%;3#EED+=2y}pIV71mF-_iAg3*eb9+}y-6F=-Rq z1SYOfp+GJR0|P??Lm&XrU;t!-V5+%UimACtYMOjFQq!%q>llP0TF}Q_M_LOp=q0Q_ND$Of4ptGirc! zfGr}yLOCyPjvGIwo@NtR(cGIG!I-3Fs7s7D^-3!8viP`Ia}x8CQ-xR!^$he3Cp$7J F0sx0;PImwR delta 313 zcmcb~@`xq9j)8%JgHcE&#Ud%m$SBE3*TT@qP}juJEJ-&h&C)_QG0D)tFv-B!%)mTp z5nC`oe1U_GQ- zC>PAb@%Y`;(`*7g&%Z2Xj$lmEGSnr;pL!(~d0ApytT~B!$*DrDMtTN%22yNEsU?Ye J7LzTQ6an&5P@Dh& diff --git a/test_data/v1.0.1/list_struct_reorder.lance/data/00111000101101111001011144d87442baad032b53e7f244a7.lance b/test_data/v1.0.1/list_struct_reorder.lance/data/010000111100101111111111861ef14d8abd303df7f4d9b261.lance similarity index 100% rename from test_data/v1.0.1/list_struct_reorder.lance/data/00111000101101111001011144d87442baad032b53e7f244a7.lance rename to test_data/v1.0.1/list_struct_reorder.lance/data/010000111100101111111111861ef14d8abd303df7f4d9b261.lance diff --git a/test_data/v1.0.1/list_struct_reorder.lance/data/001011110010000001101010e76d574ef8911733dd148c875c.lance b/test_data/v1.0.1/list_struct_reorder.lance/data/0101110001001101100101002bf4794c4781d65d4cc3d6e658.lance similarity index 100% rename from test_data/v1.0.1/list_struct_reorder.lance/data/001011110010000001101010e76d574ef8911733dd148c875c.lance rename to test_data/v1.0.1/list_struct_reorder.lance/data/0101110001001101100101002bf4794c4781d65d4cc3d6e658.lance