diff --git a/audformat/core/table.py b/audformat/core/table.py index 30924953..3c9c21fe 100644 --- a/audformat/core/table.py +++ b/audformat/core/table.py @@ -586,6 +586,28 @@ def save( Existing files will be overwritten. + When using ``"parquet"`` as ``storage_format`` + a hash, + based on the content of the table, + is stored under the key ``b"hash"`` + in the metadata of the schema of the parquet file. + This provides a deterministic hash for the file, + as md5 sums of parquet files, + containing identical information, + often differ. + Reasons include factors like the library + that wrote the parquet file, + the chosen compression codec + and metadata written by the library. + + The hash can be accessed with ``pyarrow`` by:: + + pyarrow.parquet.read_schema(f"{path}.parquet").metadata[b"hash"].decode() + + The hash is used by :mod:`audb` + when publishing a database + to track changes of database files. + Args: path: file path without extension storage_format: storage format of table.