2727#include " arrow/io/memory.h"
2828#include " arrow/ipc/api.h"
2929#include " arrow/result.h"
30+ #include " arrow/status.h"
3031#include " arrow/type.h"
3132#include " arrow/util/base64.h"
3233#include " arrow/util/checked_cast.h"
@@ -620,7 +621,8 @@ Status MapToSchemaField(const GroupNode& group, LevelInfo current_levels,
620621 if (group.field_count () != 1 ) {
621622 return Status::Invalid (" MAP-annotated groups must have a single child." );
622623 }
623- if (group.is_repeated ()) {
624+ if (group.is_repeated () &&
625+ (group.parent () == nullptr || !group.parent ()->logical_type ()->is_list ())) {
624626 return Status::Invalid (" MAP-annotated groups must not be repeated." );
625627 }
626628
@@ -651,7 +653,9 @@ Status MapToSchemaField(const GroupNode& group, LevelInfo current_levels,
651653 return ListToSchemaField (group, current_levels, ctx, parent, out);
652654 }
653655
654- current_levels.Increment (group);
656+ if (group.is_optional ()) {
657+ current_levels.IncrementOptional ();
658+ }
655659 int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated ();
656660
657661 out->children .resize (1 );
@@ -694,17 +698,76 @@ Status MapToSchemaField(const GroupNode& group, LevelInfo current_levels,
694698 return Status::OK ();
695699}
696700
701+ Status ResolveList (const GroupNode& group, const Node& list_node,
702+ LevelInfo current_levels, SchemaTreeContext* ctx,
703+ const SchemaField* out, SchemaField* child_field) {
704+ auto check_two_level_list_repeated = [](const GroupNode& group) -> Status {
705+ // When it is repeated, the LIST-annotated 2-level structure can only serve as an
706+ // element within another LIST-annotated 2-level structure.
707+ if (group.is_repeated () &&
708+ (group.parent () == nullptr || !group.parent ()->logical_type ()->is_list ())) {
709+ return Status::Invalid (" LIST-annotated groups must not be repeated." );
710+ }
711+ return {};
712+ };
713+
714+ if (list_node.is_group ()) {
715+ const auto & list_group = static_cast <const GroupNode&>(list_node);
716+ if (list_group.field_count () > 1 ) {
717+ // The inner type of the list should be a struct when there are multiple fields
718+ // in the repeated group
719+ RETURN_NOT_OK (check_two_level_list_repeated (group));
720+ return GroupToStruct (list_group, current_levels, ctx, out, child_field);
721+ }
722+ if (list_group.field_count () == 0 ) {
723+ return Status::Invalid (" Group must have at least one child." );
724+ }
725+
726+ if (list_group.logical_type ()->is_none () && HasListElementName (list_group, group)) {
727+ // Backward-compatibility rule 4
728+ RETURN_NOT_OK (check_two_level_list_repeated (group));
729+ return GroupToStruct (list_group, current_levels, ctx, out, child_field);
730+ }
731+
732+ const auto & repeated_field = list_group.field (0 );
733+ if (!list_group.logical_type ()->is_none () || repeated_field->is_repeated ()) {
734+ RETURN_NOT_OK (check_two_level_list_repeated (group));
735+ if (list_group.logical_type ()->is_list ()) {
736+ return ListToSchemaField (list_group, current_levels, ctx, out, child_field);
737+ } else if (list_group.logical_type ()->is_map ()) {
738+ return MapToSchemaField (list_group, current_levels, ctx, out, child_field);
739+ } else {
740+ return GroupToStruct (list_group, current_levels, ctx, out, child_field);
741+ }
742+ }
743+
744+ if (group.is_repeated ()) {
745+ return Status::Invalid (" LIST-annotated groups must not be repeated." );
746+ }
747+
748+ return NodeToSchemaField (*repeated_field, current_levels, ctx, out, child_field);
749+ }
750+
751+ RETURN_NOT_OK (check_two_level_list_repeated (group));
752+ const auto & primitive_node = static_cast <const PrimitiveNode&>(list_node);
753+ int column_index = ctx->schema ->GetColumnIndex (primitive_node);
754+ ARROW_ASSIGN_OR_RAISE (std::shared_ptr<ArrowType> type,
755+ GetTypeForNode (column_index, primitive_node, ctx));
756+ auto item_field = ::arrow::field (list_node.name (), type, /* nullable=*/ false ,
757+ FieldIdMetadata (list_node.field_id ()));
758+ return PopulateLeaf (column_index, item_field, current_levels, ctx, out, child_field);
759+ }
760+
697761Status ListToSchemaField (const GroupNode& group, LevelInfo current_levels,
698762 SchemaTreeContext* ctx, const SchemaField* parent,
699763 SchemaField* out) {
700764 if (group.field_count () != 1 ) {
701765 return Status::Invalid (" LIST-annotated groups must have a single child." );
702766 }
703- if (group.is_repeated ()) {
704- return Status::Invalid (" LIST-annotated groups must not be repeated." );
705- }
706767
707- current_levels.Increment (group);
768+ if (group.is_optional ()) {
769+ current_levels.IncrementOptional ();
770+ }
708771
709772 out->children .resize (group.field_count ());
710773 SchemaField* child_field = &out->children [0 ];
@@ -720,88 +783,8 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels,
720783 }
721784
722785 int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated ();
723- if (list_node.is_group ()) {
724- const auto & list_group = static_cast <const GroupNode&>(list_node);
725- if (list_group.field_count () > 1 ) {
726- // The inner type of the list should be a struct when there are multiple fields
727- // in the repeated group
728- RETURN_NOT_OK (GroupToStruct (list_group, current_levels, ctx, out, child_field));
729- } else if (list_group.field_count () == 1 ) {
730- const auto & repeated_field = list_group.field (0 );
731- if (repeated_field->is_repeated ()) {
732- // Special case where the inner type might be a list with two-level encoding
733- // like below:
734- //
735- // required/optional group name=SOMETHING (LIST) {
736- // repeated group array (LIST) {
737- // repeated TYPE item;
738- // }
739- // }
740- //
741- // yields list<item: list<item: TYPE not null> not null> ?nullable
742- if (!list_group.logical_type ()->is_list ()) {
743- return Status::Invalid (" Group with one repeated child must be LIST-annotated." );
744- }
745- // LIST-annotated group with three-level encoding cannot be repeated.
746- if (repeated_field->is_group ()) {
747- auto & repeated_group_field = static_cast <const GroupNode&>(*repeated_field);
748- if (repeated_group_field.field_count () == 0 ) {
749- return Status::Invalid (" LIST-annotated groups must have at least one child." );
750- }
751- if (!repeated_group_field.field (0 )->is_repeated ()) {
752- return Status::Invalid (" LIST-annotated groups must not be repeated." );
753- }
754- }
755- RETURN_NOT_OK (
756- NodeToSchemaField (*repeated_field, current_levels, ctx, out, child_field));
757- } else if (HasListElementName (list_group, group)) {
758- // We distinguish the special case that we have
759- //
760- // required/optional group name=SOMETHING {
761- // repeated group name=array or $SOMETHING_tuple {
762- // required/optional TYPE item;
763- // }
764- // }
765- //
766- // The inner type of the list should be a struct rather than a primitive value
767- //
768- // yields list<item: struct<item: TYPE ?nullable> not null> ?nullable
769- RETURN_NOT_OK (GroupToStruct (list_group, current_levels, ctx, out, child_field));
770- } else {
771- // Resolve 3-level encoding
772- //
773- // required/optional group name=whatever {
774- // repeated group name=list {
775- // required/optional TYPE item;
776- // }
777- // }
778- //
779- // yields list<item: TYPE ?nullable> ?nullable
780- RETURN_NOT_OK (
781- NodeToSchemaField (*repeated_field, current_levels, ctx, out, child_field));
782- }
783- } else {
784- return Status::Invalid (" Group must have at least one child." );
785- }
786- } else {
787- // Two-level list encoding
788- //
789- // required/optional group LIST {
790- // repeated TYPE;
791- // }
792- //
793- // TYPE is a primitive type
794- //
795- // yields list<item: TYPE not null> ?nullable
796- const auto & primitive_node = static_cast <const PrimitiveNode&>(list_node);
797- int column_index = ctx->schema ->GetColumnIndex (primitive_node);
798- ARROW_ASSIGN_OR_RAISE (std::shared_ptr<ArrowType> type,
799- GetTypeForNode (column_index, primitive_node, ctx));
800- auto item_field = ::arrow::field (list_node.name (), type, /* nullable=*/ false ,
801- FieldIdMetadata (list_node.field_id ()));
802- RETURN_NOT_OK (
803- PopulateLeaf (column_index, item_field, current_levels, ctx, out, child_field));
804- }
786+ RETURN_NOT_OK (ResolveList (group, list_node, current_levels, ctx, out, child_field));
787+
805788 ARROW_ASSIGN_OR_RAISE (auto list_type,
806789 MakeArrowList (child_field->field , ctx->properties ));
807790 out->field = ::arrow::field (group.name (), std::move (list_type), group.is_optional (),
0 commit comments