Skip to content

Commit e8a30b7

Browse files
Add coverage for uuid shredding in objects
1 parent a63c1c0 commit e8a30b7

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed

parquet-variant-compute/src/shred_variant.rs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,173 @@ mod tests {
948948
assert!(value_field3.is_null(0)); // fully shredded, no remaining fields
949949
}
950950

951+
#[test]
952+
fn test_uuid_shredding_in_objects() {
953+
let mock_uuid_1 = Uuid::new_v4();
954+
let mock_uuid_2 = Uuid::new_v4();
955+
let mock_uuid_3 = Uuid::new_v4();
956+
957+
let mut builder = VariantArrayBuilder::new(6);
958+
959+
// Row 0: Fully shredded object with both UUID fields
960+
builder
961+
.new_object()
962+
.with_field("id", mock_uuid_1)
963+
.with_field("session_id", mock_uuid_2)
964+
.finish();
965+
966+
// Row 1: Partially shredded object - UUID fields plus extra field
967+
builder
968+
.new_object()
969+
.with_field("id", mock_uuid_2)
970+
.with_field("session_id", mock_uuid_3)
971+
.with_field("name", "test_user")
972+
.finish();
973+
974+
// Row 2: Missing UUID field (no session_id)
975+
builder.new_object().with_field("id", mock_uuid_1).finish();
976+
977+
// Row 3: Type mismatch - id is UUID but session_id is a string
978+
builder
979+
.new_object()
980+
.with_field("id", mock_uuid_3)
981+
.with_field("session_id", "not-a-uuid")
982+
.finish();
983+
984+
// Row 4: Object with non-UUID value in id field
985+
builder
986+
.new_object()
987+
.with_field("id", 12345i64)
988+
.with_field("session_id", mock_uuid_1)
989+
.finish();
990+
991+
// Row 5: Null
992+
builder.append_null();
993+
994+
let input = builder.build();
995+
996+
let fields = Fields::from(vec![
997+
Field::new("id", DataType::FixedSizeBinary(16), true),
998+
Field::new("session_id", DataType::FixedSizeBinary(16), true),
999+
]);
1000+
let target_schema = DataType::Struct(fields);
1001+
1002+
let result = shred_variant(&input, &target_schema).unwrap();
1003+
1004+
assert!(result.value_field().is_some());
1005+
assert!(result.typed_value_field().is_some());
1006+
assert_eq!(result.len(), 6);
1007+
1008+
let metadata = result.metadata_field();
1009+
let value = result.value_field().unwrap();
1010+
let typed_value = result
1011+
.typed_value_field()
1012+
.unwrap()
1013+
.as_any()
1014+
.downcast_ref::<arrow::array::StructArray>()
1015+
.unwrap();
1016+
1017+
// Extract id and session_id fields from typed_value struct
1018+
let id_field =
1019+
ShreddedVariantFieldArray::try_new(typed_value.column_by_name("id").unwrap()).unwrap();
1020+
let session_id_field =
1021+
ShreddedVariantFieldArray::try_new(typed_value.column_by_name("session_id").unwrap())
1022+
.unwrap();
1023+
1024+
let id_value = id_field
1025+
.value_field()
1026+
.unwrap()
1027+
.as_any()
1028+
.downcast_ref::<BinaryViewArray>()
1029+
.unwrap();
1030+
let id_typed_value = id_field
1031+
.typed_value_field()
1032+
.unwrap()
1033+
.as_any()
1034+
.downcast_ref::<FixedSizeBinaryArray>()
1035+
.unwrap();
1036+
let session_id_value = session_id_field
1037+
.value_field()
1038+
.unwrap()
1039+
.as_any()
1040+
.downcast_ref::<BinaryViewArray>()
1041+
.unwrap();
1042+
let session_id_typed_value = session_id_field
1043+
.typed_value_field()
1044+
.unwrap()
1045+
.as_any()
1046+
.downcast_ref::<FixedSizeBinaryArray>()
1047+
.unwrap();
1048+
1049+
// Row 0: Fully shredded - both UUID fields shred successfully
1050+
assert!(result.is_valid(0));
1051+
1052+
assert!(value.is_null(0)); // fully shredded, no remaining fields
1053+
assert!(id_value.is_null(0));
1054+
assert!(session_id_value.is_null(0));
1055+
1056+
assert!(typed_value.is_valid(0));
1057+
assert!(id_typed_value.is_valid(0));
1058+
assert!(session_id_typed_value.is_valid(0));
1059+
1060+
assert_eq!(id_typed_value.value(0), mock_uuid_1.as_bytes());
1061+
assert_eq!(session_id_typed_value.value(0), mock_uuid_2.as_bytes());
1062+
1063+
// Row 1: Partially shredded - value contains extra name field
1064+
assert!(result.is_valid(1));
1065+
assert!(value.is_valid(1)); // contains unshredded "name" field
1066+
assert!(typed_value.is_valid(1));
1067+
assert!(id_value.is_null(1));
1068+
assert!(id_typed_value.is_valid(1));
1069+
assert_eq!(id_typed_value.value(1), mock_uuid_2.as_bytes());
1070+
assert!(session_id_value.is_null(1));
1071+
assert!(session_id_typed_value.is_valid(1));
1072+
assert_eq!(session_id_typed_value.value(1), mock_uuid_3.as_bytes());
1073+
// Verify the value field contains the name field
1074+
let row_1_variant = Variant::new(metadata.value(1), value.value(1));
1075+
let Variant::Object(obj) = row_1_variant else {
1076+
panic!("Expected object");
1077+
};
1078+
assert_eq!(obj.get("name"), Some(Variant::from("test_user")));
1079+
1080+
// Row 2: Missing session_id field
1081+
assert!(result.is_valid(2));
1082+
assert!(value.is_null(2)); // fully shredded, no extra fields
1083+
assert!(typed_value.is_valid(2));
1084+
assert!(id_value.is_null(2));
1085+
assert!(id_typed_value.is_valid(2));
1086+
assert_eq!(id_typed_value.value(2), mock_uuid_1.as_bytes());
1087+
assert!(session_id_value.is_null(2));
1088+
assert!(session_id_typed_value.is_null(2)); // missing field
1089+
1090+
// Row 3: Type mismatch - session_id is a string, not UUID
1091+
assert!(result.is_valid(3));
1092+
assert!(value.is_null(3)); // no extra fields
1093+
assert!(typed_value.is_valid(3));
1094+
assert!(id_value.is_null(3));
1095+
assert!(id_typed_value.is_valid(3));
1096+
assert_eq!(id_typed_value.value(3), mock_uuid_3.as_bytes());
1097+
assert!(session_id_value.is_valid(3)); // type mismatch, stored in value
1098+
assert!(session_id_typed_value.is_null(3));
1099+
let session_id_variant = Variant::new(metadata.value(3), session_id_value.value(3));
1100+
assert_eq!(session_id_variant, Variant::from("not-a-uuid"));
1101+
1102+
// Row 4: Type mismatch - id is int64, not UUID
1103+
assert!(result.is_valid(4));
1104+
assert!(value.is_null(4)); // no extra fields
1105+
assert!(typed_value.is_valid(4));
1106+
assert!(id_value.is_valid(4)); // type mismatch, stored in value
1107+
assert!(id_typed_value.is_null(4));
1108+
let id_variant = Variant::new(metadata.value(4), id_value.value(4));
1109+
assert_eq!(id_variant, Variant::from(12345i64));
1110+
assert!(session_id_value.is_null(4));
1111+
assert!(session_id_typed_value.is_valid(4));
1112+
assert_eq!(session_id_typed_value.value(4), mock_uuid_1.as_bytes());
1113+
1114+
// Row 5: Null
1115+
assert!(result.is_null(5));
1116+
}
1117+
9511118
#[test]
9521119
fn test_spec_compliance() {
9531120
let input = create_test_variant_array(vec![

0 commit comments

Comments
 (0)