Skip to content

Commit aee3f68

Browse files
author
Mrutunjay Kinagi
committed
fix(table): validate snapshot timestamp drift on add snapshot (#2938)
1 parent 9de7deb commit aee3f68

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

pyiceberg/table/update/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
from pyiceberg.table import Transaction
5858

5959
U = TypeVar("U")
60+
ONE_MINUTE_MS = 60_000
6061

6162

6263
class UpdateTableMetadata(ABC, Generic[U]):
@@ -442,6 +443,19 @@ def _(update: AddSnapshotUpdate, base_metadata: TableMetadata, context: _TableMe
442443
f"Cannot add snapshot with sequence number {update.snapshot.sequence_number} "
443444
f"older than last sequence number {base_metadata.last_sequence_number}"
444445
)
446+
elif (
447+
base_metadata.snapshot_log
448+
and update.snapshot.timestamp_ms - base_metadata.snapshot_log[-1].timestamp_ms < -ONE_MINUTE_MS
449+
):
450+
raise ValueError(
451+
f"Invalid snapshot timestamp {update.snapshot.timestamp_ms}: "
452+
f"before last snapshot timestamp {base_metadata.snapshot_log[-1].timestamp_ms}"
453+
)
454+
elif update.snapshot.timestamp_ms - base_metadata.last_updated_ms < -ONE_MINUTE_MS:
455+
raise ValueError(
456+
f"Invalid snapshot timestamp {update.snapshot.timestamp_ms}: "
457+
f"before last updated timestamp {base_metadata.last_updated_ms}"
458+
)
445459
elif base_metadata.format_version >= 3 and update.snapshot.first_row_id is None:
446460
raise ValueError("Cannot add snapshot without first row id")
447461
elif (

tests/table/test_init.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,6 +828,40 @@ def test_update_metadata_add_snapshot(table_v2: Table) -> None:
828828
assert new_metadata.last_updated_ms == new_snapshot.timestamp_ms
829829

830830

831+
def test_update_metadata_add_snapshot_rejects_old_timestamp_vs_snapshot_log(table_v2: Table) -> None:
832+
oldest_allowed_snapshot_ts = table_v2.metadata.snapshot_log[-1].timestamp_ms - 60_000
833+
new_snapshot = Snapshot(
834+
snapshot_id=25,
835+
parent_snapshot_id=19,
836+
sequence_number=200,
837+
timestamp_ms=oldest_allowed_snapshot_ts - 1,
838+
manifest_list="s3:/a/b/c.avro",
839+
summary=Summary(Operation.APPEND),
840+
schema_id=3,
841+
)
842+
843+
with pytest.raises(ValueError, match="before last snapshot timestamp"):
844+
update_table_metadata(table_v2.metadata, (AddSnapshotUpdate(snapshot=new_snapshot),))
845+
846+
847+
def test_update_metadata_add_snapshot_rejects_old_timestamp_vs_last_updated(table_v2: Table) -> None:
848+
# Clear snapshot-log to isolate the last_updated_ms guard.
849+
base_metadata = table_v2.metadata.model_copy(update={"snapshot_log": []})
850+
oldest_allowed_snapshot_ts = base_metadata.last_updated_ms - 60_000
851+
new_snapshot = Snapshot(
852+
snapshot_id=25,
853+
parent_snapshot_id=19,
854+
sequence_number=200,
855+
timestamp_ms=oldest_allowed_snapshot_ts - 1,
856+
manifest_list="s3:/a/b/c.avro",
857+
summary=Summary(Operation.APPEND),
858+
schema_id=3,
859+
)
860+
861+
with pytest.raises(ValueError, match="before last updated timestamp"):
862+
update_table_metadata(base_metadata, (AddSnapshotUpdate(snapshot=new_snapshot),))
863+
864+
831865
def test_update_metadata_set_ref_snapshot(table_v2: Table) -> None:
832866
update, _ = table_v2.transaction()._set_ref_snapshot(
833867
snapshot_id=3051729675574597004,

0 commit comments

Comments
 (0)