|
15 | 15 | from abc import ABC, abstractmethod |
16 | 16 | from dataclasses import dataclass |
17 | 17 | from datetime import datetime |
18 | | -from typing import Any |
| 18 | +from typing import Any, Optional |
19 | 19 |
|
20 | 20 | import numpy as np |
21 | 21 | import polars as pl |
@@ -366,29 +366,48 @@ class AltData(InfillMethod): |
366 | 366 |
|
367 | 367 | def __init__(self, alt_data_column: str, correction_factor: float = 1.0): |
368 | 368 | """Initialize the alternative data infill method. |
369 | | - |
| 369 | +
|
370 | 370 | Args: |
371 | 371 | alt_data_column: The name of the column providing the alternative data. |
372 | 372 | correction_factor: An optional correction factor to apply to the alternative data. |
373 | 373 | """ |
374 | 374 | self.alt_data_column = alt_data_column |
375 | 375 | self.correction_factor = correction_factor |
376 | 376 |
|
377 | | - def _fill(self, df: pl.DataFrame, infill_column: str) -> pl.DataFrame: |
| 377 | + def _fill(self, df: pl.DataFrame, infill_column: str, alt_df: Optional[pl.DataFrame]) -> pl.DataFrame: |
378 | 378 | """Fill missing values using data from the alternative column. |
379 | | - |
| 379 | +
|
380 | 380 | Args: |
381 | 381 | df: The DataFrame to infill. |
382 | 382 | infill_column: The column to infill. |
383 | | - |
| 383 | + alt_df: The DataFrame containing the alternative data. |
| 384 | +
|
384 | 385 | Returns: |
385 | 386 | pl.DataFrame with infilled values. |
386 | 387 | """ |
387 | | - check_columns_in_dataframe(df, [self.alt_data_column]) |
| 388 | + if alt_df is None: |
| 389 | + check_columns_in_dataframe(df, [self.alt_data_column]) |
| 390 | + else: |
| 391 | + check_columns_in_dataframe(alt_df, ['time', self.alt_data_column]) |
| 392 | + |
| 393 | + if self.alt_data_column in df.columns: |
| 394 | + raise ValueError(f"Column {self.alt_data_column} already exists in the main dataframe.") |
| 395 | + |
| 396 | + df = df.join( |
| 397 | + alt_df.select(['time', self.alt_data_column]), |
| 398 | + on='time', |
| 399 | + how="left", |
| 400 | + suffix="_alt" |
| 401 | + ) |
388 | 402 |
|
389 | | - return df.with_columns( |
| 403 | + infilled = df.with_columns( |
390 | 404 | pl.when(pl.col(infill_column).is_null()) |
391 | 405 | .then(pl.col(self.alt_data_column) * self.correction_factor) |
392 | 406 | .otherwise(pl.col(infill_column)) |
393 | 407 | .alias(self._infilled_column_name(infill_column)) |
394 | 408 | ) |
| 409 | + |
| 410 | + if alt_df is not None: |
| 411 | + infilled = infilled.drop(self.alt_data_column) |
| 412 | + |
| 413 | + return infilled |
0 commit comments