Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DynamicEarthNet dataset #2527

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,11 @@ Digital Typhoon

.. autoclass:: DigitalTyphoon

Dynamic EarthNet
^^^^^^^^^^^^^^^^

.. autoclass:: DynamicEarthNet

ETCI2021 Flood Detection
^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions docs/api/datasets/non_geo_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
`DeepGlobe Land Cover`_,S,DigitalGlobe +Vivid,-,803,7,"2,448x2,448",0.5,RGB
`DFC2022`_,S,Aerial,"CC-BY-4.0","3,981",15,"2,000x2,000",0.5,RGB
`Digital Typhoon`_,"C, R",Himawari,"CC-BY-4.0","189,364",8,512,5000,Infrared
`Dynamic EarthNet`_,S,"Planet, Sentinel-1, Sentinel-2","CC-BY-SA-4.0","54,750",7,"1,024x1,024",3--60,"RGB, NIR, MSI"
`ETCI2021 Flood Detection`_,S,Sentinel-1,-,"66,810",2,256x256,5--20,SAR
`EuroSAT`_,C,Sentinel-2,"MIT","27,000",10,64x64,10,MSI
`FAIR1M`_,OD,Gaofen/Google Earth,"CC-BY-NC-SA-3.0","15,000",37,"1,024x1,024",0.3--0.8,RGB
Expand Down
212 changes: 212 additions & 0 deletions tests/data/dynamic_earthnet/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import os
import pandas as pd
import shutil
import tarfile

import numpy as np
import rasterio

directories = {'planet', 'sentinel1', 'sentinel2', 'metadata'}

Check failure on line 15 in tests/data/dynamic_earthnet/data.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

tests/data/dynamic_earthnet/data.py:6:1: I001 Import block is un-sorted or un-formatted

samples = [
{
'planet_path': '/10N/26E-183N/1330_3107_13/',
'label_path': '/labels/1330_3107_13_10N/Labels/Raster/10N-121W-39N-L3H-SR/10N-121W-39N-L3H-SR-2018_01_01.tif',
'date': '2018-01',
},
{
'planet_path': '/17N/9E-42N/2196_3885_13/',
'label_path': '/labels/2196_3885_13_17N/Labels/Raster/17N-83W-9N-L3H-SR/17N-83W-9N-L3H-SR-2018-01-01.tif',
'date': '2019-02',
},
]

# planet_dirs = ['PF-SR', 'PF-QA']

NUM_CLASSES = 7

SIZE = 32

# Dummy directory names and samples (already provided)
directories = {'planet', 'sentinel1', 'sentinel2', 'split_info', 'labels'}

splits = ['train', 'val', 'test']

planet_dirs = ['PF-SR', 'PF-QA']


def create_dummy_tiff(filepath: str, bands: int, label_mode=False):

Check failure on line 44 in tests/data/dynamic_earthnet/data.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (ANN201)

tests/data/dynamic_earthnet/data.py:44:5: ANN201 Missing return type annotation for public function `create_dummy_tiff`

Check failure on line 44 in tests/data/dynamic_earthnet/data.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (ANN001)

tests/data/dynamic_earthnet/data.py:44:50: ANN001 Missing type annotation for function argument `label_mode`
"""Create a dummy raster with the specified number of bands.
If label_mode=True, bands have 0 or 255 values only."""
os.makedirs(os.path.dirname(filepath), exist_ok=True)

if label_mode:
dtype = np.uint8
data = np.random.choice([0, 255], size=(bands, SIZE, SIZE)).astype(dtype)
else:
dtype = np.int16
data = np.random.randint(0, 255, size=(bands, SIZE, SIZE), dtype=dtype)

with rasterio.open(
filepath,
'w',
driver='GTiff',
height=SIZE,
width=SIZE,
count=bands,
dtype=dtype,
crs='+proj=latlong',
compress='lzw',
) as dst:
dst.write(data)


def get_days_in_month(date_str):

Check failure on line 70 in tests/data/dynamic_earthnet/data.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (ANN201)

tests/data/dynamic_earthnet/data.py:70:5: ANN201 Missing return type annotation for public function `get_days_in_month`

Check failure on line 70 in tests/data/dynamic_earthnet/data.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (ANN001)

tests/data/dynamic_earthnet/data.py:70:23: ANN001 Missing type annotation for function argument `date_str`
"""Get number of days in month from date string 'YYYY-MM'."""
date = datetime.strptime(date_str, '%Y-%m')

Check failure on line 72 in tests/data/dynamic_earthnet/data.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F821)

tests/data/dynamic_earthnet/data.py:72:12: F821 Undefined name `datetime`
_, num_days = calendar.monthrange(date.year, date.month)

Check failure on line 73 in tests/data/dynamic_earthnet/data.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F821)

tests/data/dynamic_earthnet/data.py:73:19: F821 Undefined name `calendar`
return num_days


def create_split_files() -> pd.DataFrame:
"""Create train/val/test split files and parquet."""
# Ensure metadata directory exists
os.makedirs('split_info', exist_ok=True)

# Define splits
split_samples = {
'train': [samples[0], samples[1]], # both samples
'val': [samples[0]], # first sample
'test': [samples[1]], # second sample
}

# Create DataFrame for parquet
df_data = []

# Generate split files
for split, split_samples in split_samples.items():
lines = []
for sample in split_samples:
# Format: planet_path label_path date
line = (
f'{sample["planet_path"]}/PF-SR {sample["label_path"]} {sample["date"]}'
)
lines.append(line)

# Add to DataFrame data
df_data.append(
{
'split': split,
'planet_path': f'{sample["planet_path"]}/PF-SR',
'label_path': sample['label_path'],
'year_month': sample['date'],
}
)

# Write split file
with open(os.path.join('split_info', f'{split}.txt'), 'w') as f:
f.write('\n'.join(lines))

# Create and save DataFrame
df = pd.DataFrame(df_data)
df['missing_label'] = False
df['missing_s1'] = False
df['missing_s2'] = False
df['s1_path'] = df.apply(
lambda row: f'sentinel1/{row["planet_path"].split("/")[3]}/{row["planet_path"].split("/")[3]}_{row["year_month"].replace("-", "_")}.tif',
axis=1,
)
df['s2_path'] = df.apply(
lambda row: f'sentinel2/{row["planet_path"].split("/")[3]}/{row["planet_path"].replace("-", "_")}.tif',
axis=1,
)
df['planet_path'] = df['planet_path'].apply(lambda x: f'planet{x}')
df['label_path'] = df['label_path'].apply(lambda x: x.lstrip('/'))
df.to_parquet(os.path.join('split_info', 'splits.parquet'))

return df


def main():

Check failure on line 136 in tests/data/dynamic_earthnet/data.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (ANN201)

tests/data/dynamic_earthnet/data.py:136:5: ANN201 Missing return type annotation for public function `main`
# if directories exists remove them
for directory in directories:
if os.path.exists(directory):
shutil.rmtree(directory)
# create the metadata
df = create_split_files()

# iterate over the metadata to create samples
for i, row in df.iterrows():
# # Create data for each modality
# for sample in samples:
num_days = pd.Period(row['year_month']).days_in_month
planet_path = row['planet_path'].lstrip('/')
# planet_base_path = os.path.dirname(planet_path)
label_path = row['label_path'].lstrip('/')
s1_path = row['s1_path']
s2_path = row['s2_path']

# Generate daily files for the whole month
for day in range(1, num_days + 1):
date = f'{row["year_month"]}-{day:02d}'

# 1. Planet data (PF-SR → 4 bands, PF-QA → 1 band)
# for planet_dir in planet_dirs:
# bands = 4 if planet_dir == 'PF-SR' else 1
tif_path = os.path.join(
planet_path, f'{date}.tif'
)
create_dummy_tiff(tif_path, bands=4)

# 2. Sentinel-1 data (8 band)
tif_path = os.path.join(s1_path, f'{date}.tif')
create_dummy_tiff(tif_path, bands=8)

# 3. Sentinel-2 data (12 bands)
tif_path = os.path.join(s2_path, f'{date}.tif')
create_dummy_tiff(tif_path, bands=12)

# 4. Labels (6 bands binary)
tif_path = os.path.join(label_path, f'{date}.tif')
create_dummy_tiff(tif_path, bands=NUM_CLASSES, label_mode=True)

# 5) Create separate tarballs for each modality
tar_info = [
('planet_pf_sr.tar.gz', 'planet', 'PF-SR'),
('planet_pf_qa.tar.gz', 'planet', 'PF-QA'),
('sentinel1.tar.gz', 'sentinel1', None),
('sentinel2.tar.gz', 'sentinel2', None),
('labels.tar.gz', 'labels', None),
('split_info.tar.gz', 'split_info', None),
]

for tar_name, top_dir, sub_dir in tar_info:
with tarfile.open(tar_name, 'w:gz') as tar:
if sub_dir:
# For planet data, include specific subdirectory but keep planet/ prefix
for sample in samples:
add_dir = os.path.join(
top_dir, sample['planet_path'].lstrip('/'), sub_dir
)
if os.path.exists(add_dir):
# Include top_dir in arcname to maintain planet/ prefix
tar.add(add_dir)
else:
# For labels, include entire directory
if os.path.exists(top_dir):
tar.add(top_dir)

# compute md5sum of tarball
with open(tar_name, 'rb') as f:
md5 = hashlib.md5(f.read()).hexdigest()
print(f'{tar_name}: {md5}')


if __name__ == '__main__':
main()
Binary file added tests/data/dynamic_earthnet/labels.tar.gz
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added tests/data/dynamic_earthnet/planet_pf_qa.tar.gz
Binary file not shown.
Binary file added tests/data/dynamic_earthnet/planet_pf_sr.tar.gz
Binary file not shown.
Binary file added tests/data/dynamic_earthnet/sentinel1.tar.gz
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added tests/data/dynamic_earthnet/sentinel2.tar.gz
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added tests/data/dynamic_earthnet/split_info.tar.gz
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data/dynamic_earthnet/split_info/test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/17N/9E-42N/2196_3885_13//PF-SR /labels/2196_3885_13_17N/Labels/Raster/17N-83W-9N-L3H-SR/17N-83W-9N-L3H-SR-2018-01-01.tif 2019-02
2 changes: 2 additions & 0 deletions tests/data/dynamic_earthnet/split_info/train.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/10N/26E-183N/1330_3107_13//PF-SR /labels/1330_3107_13_10N/Labels/Raster/10N-121W-39N-L3H-SR/10N-121W-39N-L3H-SR-2018_01_01.tif 2018-01
/17N/9E-42N/2196_3885_13//PF-SR /labels/2196_3885_13_17N/Labels/Raster/17N-83W-9N-L3H-SR/17N-83W-9N-L3H-SR-2018-01-01.tif 2019-02
1 change: 1 addition & 0 deletions tests/data/dynamic_earthnet/split_info/val.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/10N/26E-183N/1330_3107_13//PF-SR /labels/1330_3107_13_10N/Labels/Raster/10N-121W-39N-L3H-SR/10N-121W-39N-L3H-SR-2018_01_01.tif 2018-01
125 changes: 125 additions & 0 deletions tests/datasets/test_dynamic_earthnet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import shutil
from itertools import product
from pathlib import Path

import matplotlib.pyplot as plt
import pytest
import torch
import torch.nn as nn
from _pytest.fixtures import SubRequest
from pytest import MonkeyPatch

from torchgeo.datasets import DatasetNotFoundError, DynamicEarthNet


class TestDynamicEarthNet:
@pytest.fixture(params=product(['train'], ['monthly', 'weekly', 'daily']))
def dataset(
self, tmp_path: Path, monkeypatch: MonkeyPatch, request: SubRequest
) -> DynamicEarthNet:
filename_and_md5: ClassVar[dict[str, dict[str, str]]] = {

Check failure on line 24 in tests/datasets/test_dynamic_earthnet.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F821)

tests/datasets/test_dynamic_earthnet.py:24:27: F821 Undefined name `ClassVar`
'planet': {
'filename': 'planet_pf_sr.tar.gz',
'md5': 'c52b33928598ab33d3c29d45dcc7a908',
},
'sentinel1': {
'filename': 'sentinel1.tar.gz',
'md5': '282d7643b2ceef1790a46ab4b689227b',
},
'sentinel2': {
'filename': 'sentinel2.tar.gz',
'md5': 'd4161a1cd35f65e277a7b69d7afcc3f0',
},
'labels': {
'filename': 'labels.tar.gz',
'md5': 'f2c35402e5719320ad2fd74621f63f6c',
},
'split_info': {
'filename': 'split_info.tar.gz',
'md5': '1b4eadb3048eb2225324f1f65245feb8',
},
}
monkeypatch.setattr(DynamicEarthNet, 'filename_and_md5', filename_and_md5)
url = os.path.join('tests', 'data', 'dynamic_earthnet', '{}')
monkeypatch.setattr(DynamicEarthNet, 'base_url', url)
split, temporal_input = request.param
transforms = nn.Identity()
return DynamicEarthNet(
tmp_path,
split,
temporal_input=temporal_input,
transforms=transforms,
checksum=False,
download=True,
)

def test_getitem(self, dataset: DynamicEarthNet) -> None:
x = dataset[0]
assert isinstance(x, dict)
assert isinstance(x['image'], torch.Tensor)
assert isinstance(x['mask'], torch.Tensor)

if dataset.temporal_inputs == 'monthly':
assert x['image'].shape[0] == 1
elif dataset.temporal_inputs == 'weekly':
assert x['image'].shape[0] == 6
elif dataset.temporal_inputs == 'daily':
assert x['image'].shape[0] >= 28

def test_additional_modality(self, dataset: DynamicEarthNet) -> None:
x = dataset[0]
if 's1' in dataset.add_modalities:
assert isinstance(x['s1_image'], torch.Tensor)
assert x['s1_image'].shape[0] == 1
if 's2' in dataset.add_modalities:
assert isinstance(x['s2_image'], torch.Tensor)
assert x['s2_image'].shape[0] == 13

def test_len(self, dataset: DynamicEarthNet) -> None:
if dataset.split == 'train':
assert len(dataset) == 2
else:
assert len(dataset) == 1

def test_extract(self, tmp_path: Path) -> None:
root = os.path.join('tests', 'data', 'dynamic_earthnet')
filenames = [
'planet_pf_sr.tar.gz',
'sentinel1.tar.gz',
'sentinel2.tar.gz',
'split_info.tar.gz',
'labels.tar.gz',
]
for filename in filenames:
shutil.copyfile(
os.path.join(root, filename), os.path.join(tmp_path, filename)
)
DynamicEarthNet(root=tmp_path)

def test_corrupted(self, tmp_path: Path) -> None:
with open(os.path.join(tmp_path, 'labels.tar.gz'), 'w') as f:
f.write('bad')
with pytest.raises(RuntimeError, match='Dataset found, but corrupted.'):
DynamicEarthNet(root=tmp_path, checksum=True)

def test_invalid_split(self) -> None:
with pytest.raises(AssertionError):
DynamicEarthNet(split='foo')

def test_not_downloaded(self, tmp_path: Path) -> None:
with pytest.raises(DatasetNotFoundError, match='Dataset not found'):
DynamicEarthNet(tmp_path)

def test_plot(self, dataset: DynamicEarthNet) -> None:
x = dataset[0].copy()
dataset.plot(x, suptitle='Test')
plt.close()
dataset.plot(x, show_titles=False)
plt.close()
x['prediction'] = x['mask'].clone()
dataset.plot(x)
plt.close()
2 changes: 2 additions & 0 deletions torchgeo/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from .deepglobelandcover import DeepGlobeLandCover
from .dfc2022 import DFC2022
from .digital_typhoon import DigitalTyphoon
from .dynamic_earthnet import DynamicEarthNet
from .eddmaps import EDDMapS
from .enviroatlas import EnviroAtlas
from .errors import DatasetNotFoundError, DependencyNotFoundError, RGBBandsMissingError
Expand Down Expand Up @@ -205,6 +206,7 @@
'DeepGlobeLandCover',
'DependencyNotFoundError',
'DigitalTyphoon',
'DynamicEarthNet',
'EDDMapS',
'EnviroAtlas',
'Esri2020',
Expand Down
Loading
Loading