Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create season object and related accessor #54

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 27 additions & 11 deletions hdc/algo/accessors.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
"""Xarray Accesor classes."""

from typing import Iterable, List, Optional, Union
from typing import Iterable, List, Optional, Tuple, Union
from warnings import warn

from dask import is_dask_collection
import dask.array as da
from dask.base import tokenize
import numpy as np
import xarray
from dask import is_dask_collection
from dask.base import tokenize

from . import ops
from .dekad import Dekad
from .season import Season
from .utils import get_calibration_indices, to_linspace

__all__ = [
"Anomalies",
"Dekad",
"Season",
"IterativeAggregation",
"PixelAlgorithms",
"WhittakerSmoother",
Expand Down Expand Up @@ -134,6 +136,26 @@ class DekadPeriod(Period):
_period_cls = Dekad


@xarray.register_dataset_accessor("season")
@xarray.register_dataarray_accessor("season")
class SeasonPeriod(Period):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we will be able to inherit from Period as those objects map time onto a global and mutually exclusive set of segments, while Seasons don't. (ie 2024/10/7 will always be 202410d1, but it will be in different seasons depending on what is the relevant season range).

In practical terms, all the properties in the Period class won't work for seasons, as they rely on a _period_cls that can be instantiated with out additional arguments; but for Season we need to pass the season_range.

We can inherit from AccessorTimeBase though.

"""Accessor class for handling seasonal indexing of an xarray object."""

def label(self, season_ranges: List[Tuple[int, int]]) -> List:
"""
Assigns a seasonal label (e.g., '2021-01') to each time step in the xarray object.

Args:
season_ranges (List[Tuple[int, int]]): List of (start, end) dekads defining seasons.

Returns:
xarray.DataArray: A new DataArray with an added 'season' coordinate.
"""
season = Season(season_ranges=season_ranges)

return self._tseries.apply(lambda date: season.season_label(date)).to_xarray()


class IterativeAggregation(AccessorBase):
"""Class to aggregate multiple coordinate slices."""

Expand Down Expand Up @@ -489,10 +511,7 @@ def spi(
)

# pylint: disable=import-outside-toplevel
from .ops.stats import (
gammastd_yxt,
gammastd_grp,
)
from .ops.stats import gammastd_grp, gammastd_yxt

tix = self._obj.get_index("time")

Expand Down Expand Up @@ -649,10 +668,7 @@ def autocorr(self):
def mktrend(self):
"""Calculate the Mann-Kendall trend along the time dimension."""
# pylint: disable=import-outside-toplevel
from .ops.stats import (
_mann_kendall_trend_gu,
_mann_kendall_trend_gu_nd,
)
from .ops.stats import _mann_kendall_trend_gu, _mann_kendall_trend_gu_nd

nodata = self._obj.attrs.get("nodata", None)
if nodata is None:
Expand Down
138 changes: 138 additions & 0 deletions hdc/algo/season.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""Season helper class."""

from datetime import date, datetime
from typing import List, Optional, Tuple, Union

import pandas as pd
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pandas is not a dependency in hdc-algo


from .dekad import Dekad


class Season:
"""
Handles season-based indexing and labeling for time series data using dekads.
"""

def __init__(self, season_ranges: List[Tuple[int, int]]):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably what we want (to mirror the approach in Dekad) is:

class Season:

    def __init__(self, date: Union[str, int, datetime, date, Dekad], season_range: List[Tuple[int, int]]):
        # Some code to convert in flexible date input into an integer based representation of the season
        # For example 201501 could be 2015 season 1.
        self._seas = seas
        # Also store the season_range to allow operations and property calculations
        self.season_range = season_range

"""
Initialize the Season class with user-defined season ranges.

Args:
season_ranges (List[Tuple[int, int]]): List of (start_dekad, end_dekad) tuples defining seasons.
"""
self.season_ranges = season_ranges
self.validate_season_ranges()

def __repr__(self):
return f"Season(season_ranges={self.season_ranges}, season_lengths={self.season_lengths})"

def __hash__(self):
return hash(tuple(self.season_ranges))

def __eq__(self, other):
if not isinstance(other, Season):
return False
return self.season_ranges == other.season_ranges

def __len__(self) -> int:
return len(self.season_ranges)

@property
def season_lengths(self) -> List[int]:
"""
Returns the length (in dekads) of each season range.

Returns:
List[int]: Lengths of each season range (number of dekads in each season).
"""
lengths = []
for start, end in self.season_ranges:
if start <= end: # Normal season range
lengths.append(end - start + 1)
else: # Cross-year season range
lengths.append((36 - start + 1) + end)
return lengths

@property
def raw(self) -> List[Tuple[int, int]]:
"""Returns the raw season representation."""
return self.season_ranges

def season_index(self, dekad_of_year: int) -> Optional[int]:
"""
Returns the season index (e.g., 1, 2, etc.) for the given dekad of the year.

Args:
dekad_of_year (int): Dekad index (1-36).

Returns:
int: Season index or None if no match.
"""
for i, (start, end) in enumerate(self.season_ranges):
if start <= end: # Normal case
if start <= dekad_of_year <= end:
return i + 1
else: # Cross-year case
if dekad_of_year >= start or dekad_of_year <= end:
return i + 1
return None

def season_label(self, date: Union[datetime, date]) -> Optional[str]:
"""
Returns the season label (e.g., '2021-01', '2021-02') for the provided date.

For cross-year seasons (e.g., Oct-May), the label uses the **starting year**.

Args:
date (datetime or date): Input date.

Returns:
str: Season label (e.g., '2021-01') or NaT if no match.
"""
dekad = Dekad(date).yidx
season_idx = self.idx(date)
if not season_idx:
return pd.NaT

# Determine correct reference year for cross-year seasons
for start, end in self.season_ranges:
if start <= end: # Normal case
if start <= dekad <= end:
return f"{date.year}-{season_idx:02d}"
else: # Cross-year case
if dekad >= start or dekad <= end:
ref_year = date.year if dekad >= start else date.year - 1
return f"{ref_year}-{season_idx:02d}"
return pd.NaT

def idx(self, date: Union[datetime, date]) -> Optional[int]:
"""
Apply season indexing for the provided date and return the season index.

Args:
date (datetime or date): Input date.

Returns:
int: Season index or None if no match.
"""
return self.season_index(Dekad(date).yidx)

def validate_season_ranges(self):
"""
Ensures that the season ranges are valid (e.g., dekads are within 1-36) and do not overlap.
"""
for i, (start, end) in enumerate(self.season_ranges):
if not (1 <= start <= 36 and 1 <= end <= 36):
raise ValueError(
f"Invalid season range: ({start}, {end}). Dekads must be in [1, 36]."
)

# Check for overlaps with previous ranges
for j, (other_start, other_end) in enumerate(self.season_ranges):
if i != j: # Avoid comparing the season with itself
if (start <= other_end and end >= other_start) or (
start < other_start and end >= other_end
):
raise ValueError(
f"Season range ({start}, {end}) overlaps with ({other_start}, {other_end})."
)
82 changes: 82 additions & 0 deletions tests/test_season.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from datetime import datetime

import pandas as pd

from hdc.algo.season import Season

# pylint: disable=use-implicit-booleaness-not-comparison


def test_season():
# Test initialization and basic attributes
season = Season([(1, 10), (11, 20), (21, 30)])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's discuss the implementation approach. Here a Season instance is an object that can map dates to specific seasons in time. Compare this to the Dekad class, where each instance is actually a specific Dekad in time. I think we want to replicate the latter approach.


# Check season lengths
assert len(season) == 3
assert season.season_lengths == [10, 10, 10]

# Test raw season ranges
assert season.raw == [(1, 10), (11, 20), (21, 30)]

# Test the string representation and hashing
assert (
repr(season)
== "Season(season_ranges=[(1, 10), (11, 20), (21, 30)], season_lengths=[10, 10, 10])"
)
assert hash(season) == hash(tuple(season.season_ranges))

# Test equality
season_2 = Season([(1, 10), (11, 20), (21, 30)])
assert season == season_2
assert season != Season([(1, 10), (11, 20)])
assert season != Season([(1, 5), (6, 10), (11, 15)])

# Test season index (dekad of the year)
assert season.season_index(5) == 1
assert season.season_index(15) == 2
assert season.season_index(25) == 3
assert season.season_index(36) is None
assert season.season_index(37) is None

# Test season idx method
assert season.idx(datetime(2022, 3, 10)) == 1
assert season.idx(datetime(2022, 6, 15)) == 2
assert season.idx(datetime(2022, 8, 25)) == 3
assert season.idx(datetime(2022, 12, 3)) is None

# Test season label
assert season.season_label(datetime(2022, 3, 10)) == "2022-01"
assert season.season_label(datetime(2022, 6, 15)) == "2022-02"
assert season.season_label(datetime(2022, 8, 25)) == "2022-03"
assert pd.isna(season.season_label(datetime(2022, 12, 3)))

# Test cross-year season handling
season_cross_year = Season([(27, 15)])
assert len(season_cross_year) == 1
assert season_cross_year.season_lengths == [36 - 27 + 1 + 15]
assert season_cross_year != Season([(15, 27)])
assert season_cross_year.season_index(30) == 1
assert season_cross_year.season_index(3) == 1
assert season_cross_year.season_index(22) is None
assert season_cross_year.idx(datetime(2024, 11, 12)) == 1
assert season_cross_year.idx(datetime(2025, 3, 28)) == 1
assert season_cross_year.idx(datetime(2024, 6, 1)) is None
assert season_cross_year.season_label(datetime(2024, 11, 12)) == "2024-01"
assert season_cross_year.season_label(datetime(2025, 3, 28)) == "2024-01"
assert pd.isna(season_cross_year.season_label(datetime(2024, 6, 1)))

# Test invalid season ranges (should raise ValueError)
try:
Season([(1, 10), (5, 15)]) # Overlapping ranges
except ValueError as e:
assert str(e) == "Season range (1, 10) overlaps with (5, 15)."

try:
Season([(30, 5), (5, 15)]) # Overlapping ranges
except ValueError as e:
assert str(e) == "Season range (5, 15) overlaps with (30, 5)."

try:
Season([(40, 20)]) # Invalid dekad range
except ValueError as e:
assert str(e) == "Invalid season range: (40, 20). Dekads must be in [1, 36]."
Loading