Skip to content

Commit

Permalink
Zarr: report COMPRESSOR and FILTERS in structural metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
rouault committed Mar 6, 2025
1 parent fe5ec96 commit e782e03
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 8 deletions.
Binary file added autotest/gdrivers/data/zarr/v3/gzip.zarr/gzip/c/0/0
Binary file not shown.
44 changes: 44 additions & 0 deletions autotest/gdrivers/data/zarr/v3/gzip.zarr/gzip/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"zarr_format":3,
"node_type":"array",
"shape":[
1,
2
],
"data_type":"uint8",
"chunk_grid":{
"name":"regular",
"configuration":{
"chunk_shape":[
1,
2
]
}
},
"chunk_key_encoding":{
"name":"default",
"configuration":{
"separator":"\/"
}
},
"fill_value":0,
"codecs":[
{
"name":"bytes",
"configuration":{
"endian":"little"
}
},
{
"name":"gzip",
"configuration":{
"level":6
}
}
],
"attributes":{},
"dimension_names":[
"Y",
"X"
]
}
5 changes: 5 additions & 0 deletions autotest/gdrivers/data/zarr/v3/gzip.zarr/zarr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"zarr_format": 3,
"node_type": "group",
"attributes": {}
}
30 changes: 30 additions & 0 deletions autotest/gdrivers/zarr_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,33 @@ def test_zarr_read_compression_methods(datasetname, compressor):
ar = rg.OpenMDArray(rg.GetMDArrayNames()[0])
assert ar
assert ar.Read() == array.array("b", [1, 2])
assert json.loads(ar.GetStructuralInfo()["COMPRESSOR"])["id"] == compressor


# Check reading different compression methods
@pytest.mark.parametrize(
"datasetname,compressor",
[
("gzip.zarr", "gzip"),
],
)
def test_zarr_v3_read_compression_methods(datasetname, compressor):

compressors = gdal.GetDriverByName("Zarr").GetMetadataItem("COMPRESSORS")
filename = "data/zarr/v3/" + datasetname

if compressor not in compressors:
with gdal.quiet_errors():
ds = gdal.OpenEx(filename, gdal.OF_MULTIDIM_RASTER)
assert ds is None
else:
ds = gdal.OpenEx(filename, gdal.OF_MULTIDIM_RASTER)
rg = ds.GetRootGroup()
assert rg
ar = rg.OpenMDArray(rg.GetMDArrayNames()[0])
assert ar
assert ar.Read() == array.array("b", [1, 2])
assert json.loads(ar.GetStructuralInfo()["COMPRESSOR"])["name"] == compressor


def test_zarr_read_shuffle_filter():
Expand All @@ -545,6 +572,9 @@ def test_zarr_read_shuffle_filter():
ar = rg.OpenMDArray(rg.GetMDArrayNames()[0])
assert ar
assert ar.Read() == array.array("h", [1, 2])
assert json.loads(ar.GetStructuralInfo()["FILTERS"]) == [
{"elementsize": 2, "id": "shuffle"}
]


def test_zarr_read_shuffle_filter_update(tmp_path):
Expand Down
21 changes: 13 additions & 8 deletions frmts/zarr/zarr.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,7 @@ class ZarrArray CPL_NON_FINAL : public GDALPamMDArray
const GDALExtendedDataType m_oType;
const std::vector<DtypeElt> m_aoDtypeElts;
const std::vector<GUInt64> m_anBlockSize;
CPLStringList m_aosStructuralInfo{};
CPLJSONObject m_dtype{};
GByte *m_pabyNoData = nullptr;
std::string m_osDimSeparator{"."};
Expand Down Expand Up @@ -903,6 +904,11 @@ class ZarrArray CPL_NON_FINAL : public GDALPamMDArray
return m_anBlockSize;
}

CSLConstList GetStructuralInfo() const override
{
return m_aosStructuralInfo.List();
}

const void *GetRawNoDataValue() const override
{
return m_pabyNoData;
Expand Down Expand Up @@ -1038,6 +1044,11 @@ class ZarrArray CPL_NON_FINAL : public GDALPamMDArray

bool CacheTilePresence();

void SetStructuralInfo(const char *pszKey, const char *pszValue)
{
m_aosStructuralInfo.SetNameValue(pszKey, pszValue);
}

static void DecodeSourceElt(const std::vector<DtypeElt> &elts,
const GByte *pSrc, GByte *pDst);

Expand Down Expand Up @@ -1099,10 +1110,7 @@ class ZarrV2Array final : public ZarrArray
const std::vector<DtypeElt> &aoDtypeElts,
const std::vector<GUInt64> &anBlockSize, bool bFortranOrder);

void SetCompressorJson(const CPLJSONObject &oCompressor)
{
m_oCompressorJSon = oCompressor;
}
void SetCompressorJson(const CPLJSONObject &oCompressor);

void SetCompressorDecompressor(const std::string &osDecompressorId,
const CPLCompressor *psComp,
Expand All @@ -1113,10 +1121,7 @@ class ZarrV2Array final : public ZarrArray
m_psDecompressor = psDecomp;
}

void SetFilters(const CPLJSONArray &oFiltersArray)
{
m_oFiltersArray = oFiltersArray;
}
void SetFilters(const CPLJSONArray &oFiltersArray);

void Flush() override;

Expand Down
24 changes: 24 additions & 0 deletions frmts/zarr/zarr_v2_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1964,3 +1964,27 @@ ZarrV2Group::LoadArray(const std::string &osArrayName,

return poArray;
}

/************************************************************************/
/* ZarrV2Group::SetCompressorJson() */
/************************************************************************/

void ZarrV2Array::SetCompressorJson(const CPLJSONObject &oCompressor)
{
m_oCompressorJSon = oCompressor;
if (oCompressor.GetType() != CPLJSONObject::Type::Null)
m_aosStructuralInfo.SetNameValue("COMPRESSOR",
oCompressor.ToString().c_str());
}

/************************************************************************/
/* ZarrV2Group::SetFilters() */
/************************************************************************/

void ZarrV2Array::SetFilters(const CPLJSONArray &oFiltersArray)
{
m_oFiltersArray = oFiltersArray;
if (oFiltersArray.Size() > 0)
m_aosStructuralInfo.SetNameValue("FILTERS",
oFiltersArray.ToString().c_str());
}
6 changes: 6 additions & 0 deletions frmts/zarr/zarr_v3_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1585,6 +1585,12 @@ ZarrV3Group::LoadArray(const std::string &osArrayName,
poArray->ParseSpecialAttributes(m_pSelf.lock(), oAttributes);
poArray->SetAttributes(oAttributes);
poArray->SetDtype(oDtype);
if (oCodecs.Size() > 0 &&
oCodecs[oCodecs.Size() - 1].GetString("name") != "bytes")
{
poArray->SetStructuralInfo(
"COMPRESSOR", oCodecs[oCodecs.Size() - 1].ToString().c_str());
}
if (poCodecs)
poArray->SetCodecs(std::move(poCodecs));
RegisterArray(poArray);
Expand Down
6 changes: 6 additions & 0 deletions frmts/zarr/zarr_v3_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,12 @@ std::shared_ptr<GDALMDArray> ZarrV3Group::CreateMDArray(
poArray->SetFilename(osFilename);
poArray->SetDimSeparator(pszDimSeparator);
poArray->SetDtype(dtype);
if (oCodecs.Size() > 0 &&
oCodecs[oCodecs.Size() - 1].GetString("name") != "bytes")
{
poArray->SetStructuralInfo(
"COMPRESSOR", oCodecs[oCodecs.Size() - 1].ToString().c_str());
}
if (poCodecs)
poArray->SetCodecs(std::move(poCodecs));
poArray->SetUpdatable(true);
Expand Down

0 comments on commit e782e03

Please sign in to comment.