-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Create season object and related accessor #54
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
"""Season helper class.""" | ||
|
||
from datetime import date, datetime | ||
from typing import List, Optional, Tuple, Union | ||
|
||
import pandas as pd | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pandas is not a dependency in hdc-algo |
||
|
||
from .dekad import Dekad | ||
|
||
|
||
class Season: | ||
""" | ||
Handles season-based indexing and labeling for time series data using dekads. | ||
""" | ||
|
||
def __init__(self, season_ranges: List[Tuple[int, int]]): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably what we want (to mirror the approach in Dekad) is:
|
||
""" | ||
Initialize the Season class with user-defined season ranges. | ||
|
||
Args: | ||
season_ranges (List[Tuple[int, int]]): List of (start_dekad, end_dekad) tuples defining seasons. | ||
""" | ||
self.season_ranges = season_ranges | ||
self.validate_season_ranges() | ||
|
||
def __repr__(self): | ||
return f"Season(season_ranges={self.season_ranges}, season_lengths={self.season_lengths})" | ||
|
||
def __hash__(self): | ||
return hash(tuple(self.season_ranges)) | ||
|
||
def __eq__(self, other): | ||
if not isinstance(other, Season): | ||
return False | ||
return self.season_ranges == other.season_ranges | ||
|
||
def __len__(self) -> int: | ||
return len(self.season_ranges) | ||
|
||
@property | ||
def season_lengths(self) -> List[int]: | ||
""" | ||
Returns the length (in dekads) of each season range. | ||
|
||
Returns: | ||
List[int]: Lengths of each season range (number of dekads in each season). | ||
""" | ||
lengths = [] | ||
for start, end in self.season_ranges: | ||
if start <= end: # Normal season range | ||
lengths.append(end - start + 1) | ||
else: # Cross-year season range | ||
lengths.append((36 - start + 1) + end) | ||
return lengths | ||
|
||
@property | ||
def raw(self) -> List[Tuple[int, int]]: | ||
"""Returns the raw season representation.""" | ||
return self.season_ranges | ||
|
||
def season_index(self, dekad_of_year: int) -> Optional[int]: | ||
""" | ||
Returns the season index (e.g., 1, 2, etc.) for the given dekad of the year. | ||
|
||
Args: | ||
dekad_of_year (int): Dekad index (1-36). | ||
|
||
Returns: | ||
int: Season index or None if no match. | ||
""" | ||
for i, (start, end) in enumerate(self.season_ranges): | ||
if start <= end: # Normal case | ||
if start <= dekad_of_year <= end: | ||
return i + 1 | ||
else: # Cross-year case | ||
if dekad_of_year >= start or dekad_of_year <= end: | ||
return i + 1 | ||
return None | ||
|
||
def season_label(self, date: Union[datetime, date]) -> Optional[str]: | ||
""" | ||
Returns the season label (e.g., '2021-01', '2021-02') for the provided date. | ||
|
||
For cross-year seasons (e.g., Oct-May), the label uses the **starting year**. | ||
|
||
Args: | ||
date (datetime or date): Input date. | ||
|
||
Returns: | ||
str: Season label (e.g., '2021-01') or NaT if no match. | ||
""" | ||
dekad = Dekad(date).yidx | ||
season_idx = self.idx(date) | ||
if not season_idx: | ||
return pd.NaT | ||
|
||
# Determine correct reference year for cross-year seasons | ||
for start, end in self.season_ranges: | ||
if start <= end: # Normal case | ||
if start <= dekad <= end: | ||
return f"{date.year}-{season_idx:02d}" | ||
else: # Cross-year case | ||
if dekad >= start or dekad <= end: | ||
ref_year = date.year if dekad >= start else date.year - 1 | ||
return f"{ref_year}-{season_idx:02d}" | ||
return pd.NaT | ||
|
||
def idx(self, date: Union[datetime, date]) -> Optional[int]: | ||
""" | ||
Apply season indexing for the provided date and return the season index. | ||
|
||
Args: | ||
date (datetime or date): Input date. | ||
|
||
Returns: | ||
int: Season index or None if no match. | ||
""" | ||
return self.season_index(Dekad(date).yidx) | ||
|
||
def validate_season_ranges(self): | ||
""" | ||
Ensures that the season ranges are valid (e.g., dekads are within 1-36) and do not overlap. | ||
""" | ||
for i, (start, end) in enumerate(self.season_ranges): | ||
if not (1 <= start <= 36 and 1 <= end <= 36): | ||
raise ValueError( | ||
f"Invalid season range: ({start}, {end}). Dekads must be in [1, 36]." | ||
) | ||
|
||
# Check for overlaps with previous ranges | ||
for j, (other_start, other_end) in enumerate(self.season_ranges): | ||
if i != j: # Avoid comparing the season with itself | ||
if (start <= other_end and end >= other_start) or ( | ||
start < other_start and end >= other_end | ||
): | ||
raise ValueError( | ||
f"Season range ({start}, {end}) overlaps with ({other_start}, {other_end})." | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
from datetime import datetime | ||
|
||
import pandas as pd | ||
|
||
from hdc.algo.season import Season | ||
|
||
# pylint: disable=use-implicit-booleaness-not-comparison | ||
|
||
|
||
def test_season(): | ||
# Test initialization and basic attributes | ||
season = Season([(1, 10), (11, 20), (21, 30)]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's discuss the implementation approach. Here a |
||
|
||
# Check season lengths | ||
assert len(season) == 3 | ||
assert season.season_lengths == [10, 10, 10] | ||
|
||
# Test raw season ranges | ||
assert season.raw == [(1, 10), (11, 20), (21, 30)] | ||
|
||
# Test the string representation and hashing | ||
assert ( | ||
repr(season) | ||
== "Season(season_ranges=[(1, 10), (11, 20), (21, 30)], season_lengths=[10, 10, 10])" | ||
) | ||
assert hash(season) == hash(tuple(season.season_ranges)) | ||
|
||
# Test equality | ||
season_2 = Season([(1, 10), (11, 20), (21, 30)]) | ||
assert season == season_2 | ||
assert season != Season([(1, 10), (11, 20)]) | ||
assert season != Season([(1, 5), (6, 10), (11, 15)]) | ||
|
||
# Test season index (dekad of the year) | ||
assert season.season_index(5) == 1 | ||
assert season.season_index(15) == 2 | ||
assert season.season_index(25) == 3 | ||
assert season.season_index(36) is None | ||
assert season.season_index(37) is None | ||
|
||
# Test season idx method | ||
assert season.idx(datetime(2022, 3, 10)) == 1 | ||
assert season.idx(datetime(2022, 6, 15)) == 2 | ||
assert season.idx(datetime(2022, 8, 25)) == 3 | ||
assert season.idx(datetime(2022, 12, 3)) is None | ||
|
||
# Test season label | ||
assert season.season_label(datetime(2022, 3, 10)) == "2022-01" | ||
assert season.season_label(datetime(2022, 6, 15)) == "2022-02" | ||
assert season.season_label(datetime(2022, 8, 25)) == "2022-03" | ||
assert pd.isna(season.season_label(datetime(2022, 12, 3))) | ||
|
||
# Test cross-year season handling | ||
season_cross_year = Season([(27, 15)]) | ||
assert len(season_cross_year) == 1 | ||
assert season_cross_year.season_lengths == [36 - 27 + 1 + 15] | ||
assert season_cross_year != Season([(15, 27)]) | ||
assert season_cross_year.season_index(30) == 1 | ||
assert season_cross_year.season_index(3) == 1 | ||
assert season_cross_year.season_index(22) is None | ||
assert season_cross_year.idx(datetime(2024, 11, 12)) == 1 | ||
assert season_cross_year.idx(datetime(2025, 3, 28)) == 1 | ||
assert season_cross_year.idx(datetime(2024, 6, 1)) is None | ||
assert season_cross_year.season_label(datetime(2024, 11, 12)) == "2024-01" | ||
assert season_cross_year.season_label(datetime(2025, 3, 28)) == "2024-01" | ||
assert pd.isna(season_cross_year.season_label(datetime(2024, 6, 1))) | ||
|
||
# Test invalid season ranges (should raise ValueError) | ||
try: | ||
Season([(1, 10), (5, 15)]) # Overlapping ranges | ||
except ValueError as e: | ||
assert str(e) == "Season range (1, 10) overlaps with (5, 15)." | ||
|
||
try: | ||
Season([(30, 5), (5, 15)]) # Overlapping ranges | ||
except ValueError as e: | ||
assert str(e) == "Season range (5, 15) overlaps with (30, 5)." | ||
|
||
try: | ||
Season([(40, 20)]) # Invalid dekad range | ||
except ValueError as e: | ||
assert str(e) == "Invalid season range: (40, 20). Dekads must be in [1, 36]." |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we will be able to inherit from
Period
as those objects map time onto a global and mutually exclusive set of segments, while Seasons don't. (ie 2024/10/7 will always be 202410d1, but it will be in different seasons depending on what is the relevant season range).In practical terms, all the properties in the
Period
class won't work for seasons, as they rely on a_period_cls
that can be instantiated with out additional arguments; but for Season we need to pass the season_range.We can inherit from
AccessorTimeBase
though.