From 569b89eb7b6891f92080973b04edb3d6a79edcb2 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 19 Nov 2025 14:47:35 -0800 Subject: [PATCH 1/3] workaround behavior change --- arrow-schema/src/datatype_parse.rs | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/arrow-schema/src/datatype_parse.rs b/arrow-schema/src/datatype_parse.rs index 4ad32f59aac6..726e4d58622e 100644 --- a/arrow-schema/src/datatype_parse.rs +++ b/arrow-schema/src/datatype_parse.rs @@ -116,7 +116,13 @@ impl<'a> Parser<'a> { /// /// TODO: support metadata: `nullable Int64, metadata: {"foo2": "value"}` fn parse_list_field(&mut self, context: &str) -> ArrowResult { - let nullable = self.parse_opt_nullable(); + // Introduction of the "nullable" token led to a breaking API change. As a temporary + // workaround we'll always treat lists as nullable to match past behavior. + // This should be reverted for 58.0.0. + // + // See https://github.com/apache/arrow-rs/issues/8883 + let _nullable = self.parse_opt_nullable(); + let nullable = true; let data_type = self.parse_next_type()?; // the field name (if exists) must be after a comma @@ -1360,13 +1366,29 @@ mod test { ])), ), (r#"Struct()"#, Struct(Fields::empty())), + ( + "List(Int64)", + List(Arc::new(Field::new_list_field(Int64, true))), + ), + ( + "LargeList(Int64)", + LargeList(Arc::new(Field::new_list_field(Int64, true))), + ), + ( + "ListView(Int64)", + ListView(Arc::new(Field::new_list_field(Int64, true))), + ), + ( + "LargeListView(Int64)", + LargeListView(Arc::new(Field::new_list_field(Int64, true))), + ), ]; for (data_type_string, expected_data_type) in cases { let parsed_data_type = parse_data_type(data_type_string).unwrap(); assert_eq!( parsed_data_type, expected_data_type, - "Parsing '{data_type_string}', expecting '{expected_data_type}'" + "Parsing '{data_type_string}', expecting '{expected_data_type}', got '{parsed_data_type}'" ); } } From 5c273b5428d6eb64f82862c3fd6f396a693ce16c Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 19 Nov 2025 15:04:49 -0800 Subject: [PATCH 2/3] tests should expect nullable fixed size lists fixed in a different PR so don't workaround for them --- arrow-schema/src/datatype_parse.rs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arrow-schema/src/datatype_parse.rs b/arrow-schema/src/datatype_parse.rs index 726e4d58622e..7a557d02f2b7 100644 --- a/arrow-schema/src/datatype_parse.rs +++ b/arrow-schema/src/datatype_parse.rs @@ -116,13 +116,15 @@ impl<'a> Parser<'a> { /// /// TODO: support metadata: `nullable Int64, metadata: {"foo2": "value"}` fn parse_list_field(&mut self, context: &str) -> ArrowResult { + let mut nullable = self.parse_opt_nullable(); // Introduction of the "nullable" token led to a breaking API change. As a temporary // workaround we'll always treat lists as nullable to match past behavior. // This should be reverted for 58.0.0. // // See https://github.com/apache/arrow-rs/issues/8883 - let _nullable = self.parse_opt_nullable(); - let nullable = true; + if !context.contains("Fixed") { + nullable = true; + } let data_type = self.parse_next_type()?; // the field name (if exists) must be after a comma @@ -1039,36 +1041,36 @@ mod test { DataType::Struct(Fields::from(vec![Field::new("f1", DataType::Int64, true)])), DataType::Struct(Fields::empty()), DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))), + DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))), + DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))), DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))), - DataType::List(Arc::new(Field::new("Int64", DataType::Int64, false))), DataType::List(Arc::new(Field::new( "nested_list", DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))), true, ))), DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, false))), + DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, true))), + DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))), DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))), - DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, false))), DataType::ListView(Arc::new(Field::new( "nested_list_view", DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))), true, ))), DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, false))), + DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, true))), + DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))), DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))), - DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, false))), DataType::LargeList(Arc::new(Field::new( "nested_large_list", DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))), true, ))), DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, false))), + DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, true))), + DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))), DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))), - DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, false))), DataType::LargeListView(Arc::new(Field::new( "nested_large_list_view", DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))), From d524fc61cd0a731b1c5e8881bac3637fb5f833cc Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 19 Nov 2025 15:17:40 -0800 Subject: [PATCH 3/3] better test fix --- arrow-schema/src/datatype_parse.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arrow-schema/src/datatype_parse.rs b/arrow-schema/src/datatype_parse.rs index 7a557d02f2b7..5412551152fc 100644 --- a/arrow-schema/src/datatype_parse.rs +++ b/arrow-schema/src/datatype_parse.rs @@ -1041,36 +1041,38 @@ mod test { DataType::Struct(Fields::from(vec![Field::new("f1", DataType::Int64, true)])), DataType::Struct(Fields::empty()), DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))), + // List(Int64) will still be treated as nullable for now. Uncomment in 58.0.0. + // See https://github.com/apache/arrow-rs/issues/8883 + //DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))), DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))), + //DataType::List(Arc::new(Field::new("Int64", DataType::Int64, false))), DataType::List(Arc::new(Field::new( "nested_list", DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))), true, ))), DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))), + //DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, false))), DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))), + //DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, false))), DataType::ListView(Arc::new(Field::new( "nested_list_view", DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))), true, ))), DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))), + //DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, false))), DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))), + //DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, false))), DataType::LargeList(Arc::new(Field::new( "nested_large_list", DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))), true, ))), DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, true))), - DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))), + //DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, false))), DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))), + //DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, false))), DataType::LargeListView(Arc::new(Field::new( "nested_large_list_view", DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))),