Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ __blobstorage__
*.bak2
# OS-specific .gitignores

# cargo insta temp files
*.pending-snap

# Mac .gitignore
# General
.DS_Store
Expand Down Expand Up @@ -99,4 +102,4 @@ parquet/pytest/venv/
__pycache__/

# Parquet file from arrow_reader_clickbench
hits_1.parquet
hits_1.parquet
1 change: 1 addition & 0 deletions arrow-schema/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ all-features = true
[dev-dependencies]
bincode = { version = "1.3.3", default-features = false }
criterion = { version = "0.5", default-features = false }
insta = "1.43.1"

[[bench]]
name = "ffi"
Expand Down
13 changes: 13 additions & 0 deletions arrow-schema/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1175,4 +1175,17 @@ mod tests {
let data_type: DataType = "UInt64".parse().unwrap();
assert_eq!(data_type, DataType::UInt64);
}

#[test]
#[cfg_attr(miri, ignore)] // Can't handle the inlined strings of the assert_debug_snapshot macro
fn test_debug_format_field() {
// Make sure the `Debug` formatting of `DataType` is readable and not too long
insta::assert_debug_snapshot!(DataType::new_list(DataType::Int8, false), @r"
List(
Field {
data_type: Int8,
},
)
");
}
}
60 changes: 59 additions & 1 deletion arrow-schema/src/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ pub type FieldRef = Arc<Field>;
///
/// Arrow Extension types, are encoded in `Field`s metadata. See
/// [`Self::try_extension_type`] to retrieve the [`ExtensionType`], if any.
#[derive(Clone, Debug)]
#[derive(Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Field {
name: String,
Expand All @@ -60,6 +60,46 @@ pub struct Field {
metadata: HashMap<String, String>,
}

impl std::fmt::Debug for Field {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
#![expect(deprecated)] // Must still print dict_id, if set
let Self {
name,
data_type,
nullable,
dict_id,
dict_is_ordered,
metadata,
} = self;

let mut s = f.debug_struct("Field");

if name != "item" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this makes sense not to print the "standard" item naame

// Keep it short when debug-formatting `DataType::List`
s.field("name", name);
}

s.field("data_type", data_type);

if *nullable {
s.field("nullable", nullable);
}

if *dict_id != 0 {
s.field("dict_id", dict_id);
}

if *dict_is_ordered {
s.field("dict_is_ordered", dict_is_ordered);
}

if !metadata.is_empty() {
s.field("metadata", metadata);
}
s.finish()
}
}

// Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered`
// into comparison. However, these properties are only used in IPC context
// for matching dictionary encoded data. They are not necessary to be same
Expand Down Expand Up @@ -914,6 +954,24 @@ mod test {
Field::new_dict(s, DataType::Int64, false, 4, false);
}

#[test]
#[cfg_attr(miri, ignore)] // Can't handle the inlined strings of the assert_debug_snapshot macro
fn test_debug_format_field() {
// Make sure the `Debug` formatting of `Field` is readable and not too long
insta::assert_debug_snapshot!(Field::new("item", DataType::UInt8, false), @r"
Field {
data_type: UInt8,
}
");
insta::assert_debug_snapshot!(Field::new("column", DataType::LargeUtf8, true), @r#"
Field {
name: "column",
data_type: LargeUtf8,
nullable: true,
}
"#);
}

#[test]
fn test_merge_incompatible_types() {
let mut field = Field::new("c1", DataType::Int64, false);
Expand Down
Loading