Skip to content

Commit 82fea79

Browse files
committed
FPM-566 add support for infilling from a different data source
1 parent 5de8f2b commit 82fea79

File tree

1 file changed

+26
-7
lines changed

1 file changed

+26
-7
lines changed

src/time_stream/infill.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from abc import ABC, abstractmethod
1616
from dataclasses import dataclass
1717
from datetime import datetime
18-
from typing import Any
18+
from typing import Any, Optional
1919

2020
import numpy as np
2121
import polars as pl
@@ -366,29 +366,48 @@ class AltData(InfillMethod):
366366

367367
def __init__(self, alt_data_column: str, correction_factor: float = 1.0):
368368
"""Initialize the alternative data infill method.
369-
369+
370370
Args:
371371
alt_data_column: The name of the column providing the alternative data.
372372
correction_factor: An optional correction factor to apply to the alternative data.
373373
"""
374374
self.alt_data_column = alt_data_column
375375
self.correction_factor = correction_factor
376376

377-
def _fill(self, df: pl.DataFrame, infill_column: str) -> pl.DataFrame:
377+
def _fill(self, df: pl.DataFrame, infill_column: str, alt_df: Optional[pl.DataFrame]) -> pl.DataFrame:
378378
"""Fill missing values using data from the alternative column.
379-
379+
380380
Args:
381381
df: The DataFrame to infill.
382382
infill_column: The column to infill.
383-
383+
alt_df: The DataFrame containing the alternative data.
384+
384385
Returns:
385386
pl.DataFrame with infilled values.
386387
"""
387-
check_columns_in_dataframe(df, [self.alt_data_column])
388+
if alt_df is None:
389+
check_columns_in_dataframe(df, [self.alt_data_column])
390+
else:
391+
check_columns_in_dataframe(alt_df, ['time', self.alt_data_column])
392+
393+
if self.alt_data_column in df.columns:
394+
raise ValueError(f"Column {self.alt_data_column} already exists in the main dataframe.")
395+
396+
df = df.join(
397+
alt_df.select(['time', self.alt_data_column]),
398+
on='time',
399+
how="left",
400+
suffix="_alt"
401+
)
388402

389-
return df.with_columns(
403+
infilled = df.with_columns(
390404
pl.when(pl.col(infill_column).is_null())
391405
.then(pl.col(self.alt_data_column) * self.correction_factor)
392406
.otherwise(pl.col(infill_column))
393407
.alias(self._infilled_column_name(infill_column))
394408
)
409+
410+
if alt_df is not None:
411+
infilled = infilled.drop(self.alt_data_column)
412+
413+
return infilled

0 commit comments

Comments
 (0)