Skip to content

Commit 9cb9a78

Browse files
authored
FPM-581: Add percentile aggregation calculation (#35)
1 parent 1ec356c commit 9cb9a78

File tree

5 files changed

+209
-10
lines changed

5 files changed

+209
-10
lines changed

docs/source/user_guide/aggregation.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,12 @@ Choose how values inside each window are summarised. Pass a **string** correspon
177177

178178
Common in hydrology for annual maxima (AMAX) or flood frequency analysis.
179179

180+
- ``"percentile"`` - **The 'nth' percentile value for the period.**
181+
182+
Useful for capturing extremes within a given period, such as the 5th or 95th percentile of streamflow.
183+
The percentile value to be calculated is provided as an integer parameter (p) from 0 to 100 (inclusive).
184+
185+
180186
Column selection
181187
----------------
182188

src/time_stream/aggregation.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,12 @@ def apply(
7272
"""
7373
ctx = AggregationCtx(df, time_name, time_anchor, periodicity)
7474
pipeline = AggregationPipeline(
75-
self, ctx, aggregation_period, columns, aggregation_time_anchor, missing_criteria
75+
self,
76+
ctx,
77+
aggregation_period,
78+
columns,
79+
aggregation_time_anchor,
80+
missing_criteria,
7681
)
7782
return pipeline.execute()
7883

@@ -364,3 +369,36 @@ def expr(self, ctx: AggregationCtx, columns: list[str]) -> list[pl.Expr]:
364369
]
365370
)
366371
return expressions
372+
373+
374+
@AggregationFunction.register
375+
class Percentile(AggregationFunction):
376+
"""An aggregation class to find the nth percentile of values within each aggregation period."""
377+
378+
name = "percentile"
379+
380+
def __init__(self, p: int):
381+
"""
382+
Initialise Percentile aggregation:
383+
384+
Args:
385+
p: The integer percentile value to apply.
386+
**kwargs: Any additional parameters to be passed through.
387+
388+
"""
389+
super().__init__()
390+
391+
self.p = p
392+
393+
def expr(self, ctx: AggregationCtx, columns: list[str]) -> list[pl.Expr]:
394+
"""Return the 'Polars' expression for calculating the percentile"""
395+
396+
# If the percentile value is between 0 -100 divide it by 100 to convert it to the quantile equivalent.
397+
if not self.p.is_integer() or not (0 <= self.p <= 100):
398+
raise ValueError("The percentile value must be provided as an integer value from 0 to 100")
399+
400+
quantile = self.p / 100
401+
402+
expressions = [(pl.col(col).quantile(quantile).alias(f"{self.name}_{col}")) for col in columns]
403+
404+
return expressions

src/time_stream/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ def aggregate(
509509
columns: str | list[str] | None = None,
510510
missing_criteria: tuple[str, float | int] | None = None,
511511
aggregation_time_anchor: TimeAnchor | None = None,
512+
**kwargs,
512513
) -> Self:
513514
"""Apply an aggregation function to a column in this TimeFrame, check the aggregation satisfies user
514515
requirements and return a new derived TimeFrame containing the aggregated data.
@@ -519,12 +520,13 @@ def aggregate(
519520
columns: The column(s) containing the data to be aggregated. If omitted, will use all data columns.
520521
missing_criteria: How the aggregation handles missing data
521522
aggregation_time_anchor: The time anchor for the aggregation result.
523+
**kwargs: Parameters specific to the aggregation function.
522524
523525
Returns:
524526
A TimeFrame containing the aggregated data.
525527
"""
526528
# Get the aggregation function instance and run the apply method
527-
agg_func = AggregationFunction.get(aggregation_function)
529+
agg_func = AggregationFunction.get(aggregation_function, **kwargs)
528530
aggregation_period = configure_period_object(aggregation_period)
529531
aggregation_time_anchor = TimeAnchor(aggregation_time_anchor) if aggregation_time_anchor else self.time_anchor
530532

0 commit comments

Comments
 (0)