Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 105 additions & 1 deletion crates/iceberg/src/io/object_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ impl ObjectCache {
let key = CachedObjectKey::ManifestList((
snapshot.manifest_list().to_string(),
table_metadata.format_version,
snapshot.schema_id().unwrap(),
snapshot
.schema_id()
.unwrap_or_else(|| table_metadata.current_schema_id()),
));
let cache_entry = self
.cache
Expand Down Expand Up @@ -406,4 +408,106 @@ mod tests {
"1.parquet"
);
}

/// Regression test: ObjectCache::get_manifest_list must not panic when
/// the snapshot has no schema_id (Iceberg v1 format tables).
#[tokio::test]
async fn test_get_manifest_list_v1_snapshot_without_schema_id() {
let tmp_dir = TempDir::new().unwrap();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are example above to use TestTableFixture to simplify the tests.

let table_location = tmp_dir.path().join("table1");
let manifest_list_location = table_location.join("metadata/manifests_list_1.avro");
let table_metadata_location = table_location.join("metadata/v1.json");

let file_io = FileIO::new_with_fs();

let template_json_str = fs::read_to_string(format!(
"{}/testdata/example_table_metadata_v1.json",
env!("CARGO_MANIFEST_DIR")
))
.unwrap();
let metadata_json = render_template(&template_json_str, context! {
table_location => &table_location,
manifest_list_location => &manifest_list_location,
table_metadata_location => &table_metadata_location,
});
let table_metadata: TableMetadata = serde_json::from_str(&metadata_json).unwrap();

let table = Table::builder()
.metadata(table_metadata)
.identifier(TableIdent::from_strs(["db", "table1"]).unwrap())
.file_io(file_io.clone())
.metadata_location(table_metadata_location.as_os_str().to_str().unwrap())
.build()
.unwrap();

let current_snapshot = table.metadata().current_snapshot().unwrap();

// Verify the snapshot has no schema_id (the condition that caused the panic)
assert!(current_snapshot.schema_id().is_none());

let current_schema = current_snapshot.schema(table.metadata()).unwrap();
let current_partition_spec = table.metadata().default_partition_spec();

// Write a manifest file
let manifest_output = table
.file_io()
.new_output(format!(
"{}/metadata/manifest_{}.avro",
table_location.to_str().unwrap(),
Uuid::new_v4()
))
.unwrap();

let mut writer = ManifestWriterBuilder::new(
manifest_output,
Some(current_snapshot.snapshot_id()),
None,
current_schema.clone(),
current_partition_spec.as_ref().clone(),
)
.build_v1();
writer
.add_entry(
ManifestEntry::builder()
.status(ManifestStatus::Added)
.data_file(
DataFileBuilder::default()
.partition_spec_id(0)
.content(DataContentType::Data)
.file_path(format!("{}/1.parquet", table_location.to_str().unwrap()))
.file_format(DataFileFormat::Parquet)
.file_size_in_bytes(100)
.record_count(1)
.partition(Struct::from_iter([Some(Literal::long(100))]))
.build()
.unwrap(),
)
.build(),
)
.unwrap();
let data_file_manifest = writer.write_manifest_file().await.unwrap();

// Write manifest list
let mut manifest_list_write = ManifestListWriter::v1(
table
.file_io()
.new_output(current_snapshot.manifest_list())
.unwrap(),
current_snapshot.snapshot_id(),
current_snapshot.parent_snapshot_id(),
);
manifest_list_write
.add_manifests(vec![data_file_manifest].into_iter())
.unwrap();
manifest_list_write.close().await.unwrap();

// This used to panic with: called `Option::unwrap()` on a `None` value
let object_cache = ObjectCache::new(table.file_io().clone());
let result = object_cache
.get_manifest_list(current_snapshot, &table.metadata_ref())
.await;

assert!(result.is_ok());
assert_eq!(result.unwrap().entries().len(), 1);
}
}
35 changes: 35 additions & 0 deletions crates/iceberg/testdata/example_table_metadata_v1.json
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a dir table_metadata under testdata, and we should move it to that dir. We also should rename it to TableMetadataV1SnapshotNoSchemaId

Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"format-version": 1,
"table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
"location": "{{ table_location }}",
"last-updated-ms": 1602638573874,
"last-column-id": 3,
"schema": {
"type": "struct",
"fields": [
{"id": 1, "name": "x", "required": true, "type": "long"}
]
},
"partition-spec": [
{
"name": "x",
"transform": "identity",
"source-id": 1,
"field-id": 1000
}
],
"properties": {},
"current-snapshot-id": 3051729675574597004,
"snapshots": [
{
"snapshot-id": 3051729675574597004,
"timestamp-ms": 1515100955770,
"summary": {"operation": "append"},
"manifest-list": "{{ manifest_list_location }}"
}
],
"snapshot-log": [
{"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770}
],
"metadata-log": [{"metadata-file": "{{ table_metadata_location }}", "timestamp-ms": 1515100}]
}
Loading