diff --git a/.github/workflows/test-all.yaml b/.github/workflows/test-all.yaml index 9fd9a84..f3df270 100644 --- a/.github/workflows/test-all.yaml +++ b/.github/workflows/test-all.yaml @@ -13,6 +13,14 @@ jobs: - name: Test Python 3.9 run: make test_3_9 + test-py-3-9-validation: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Test Python 3.9 (validation) + run: make test_validation_3_9 + test-py-3-10: runs-on: ubuntu-latest steps: @@ -21,6 +29,14 @@ jobs: - name: Test Python 3.10 run: make test_3_10 + test-py-3-10-validation: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Test Python 3.10 (validation) + run: make test_validation_3_10 + test-py-3-11: runs-on: ubuntu-latest steps: @@ -28,3 +44,11 @@ jobs: - name: Test Python 3.11 run: make test_3_11 + + test-py-3-11-validation: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Test Python 3.11 (validation) + run: make test_validation_3_11 diff --git a/Makefile b/Makefile index 78edb67..db9039a 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,14 @@ test_3_11: build_3_11 ## Test Python 3.11 pickle docker run -i --rm -v ${PWD}:${PWD} -v /tmp:/tmp ${NAME}_py_3_11:latest --model ${PWD}/tests/models/model_3_11_legacy.pkl docker run -i --rm -v ${PWD}:${PWD} -v /tmp:/tmp ${NAME}_py_3_11:latest --model ${PWD}/tests/models/model_3_11.pkl +test_validation_%: build_% ## Test validation dataset + docker run -i --rm -v ${PWD}:${PWD} -v /tmp:/tmp ${NAME}_py_$*:latest \ + --dataset v4.3/validation_int8.parquet --benchmarks v4.3/validation_benchmark_models.parquet \ + --model ${PWD}/tests/models/model_$*_legacy.pkl + +.PHONY: test_validation +test_validation: test_validation_3_9 test_validation_3_10 test_validation_3_11 + .PHONY: push_latest push_latest: push_latest_3_9 push_latest_3_10 push_latest_3_11 ## Push latest docker containers diff --git a/predict.py b/predict.py index bd7529f..a93fe1e 100644 --- a/predict.py +++ b/predict.py @@ -183,37 +183,53 @@ def main(args): if num_args > 1: benchmark_models = get_data(args.benchmarks, args.output_dir) - logging.info(f"Predicting on {len(live_features)} rows of live features") + num_eras = live_features["era"].nunique() + + if num_eras > 1: + logging.info( + f"Predicting on {len(live_features)} rows, {num_eras} eras of features" + ) + else: + logging.info(f"Predicting on {len(live_features)} rows of live features") + try: - if num_args == 1: - predictions = model(live_features) - elif num_args == 2: - predictions = model(live_features, benchmark_models) - else: - logging.error( - f"Invalid pickle function - {model_pkl} must have 1 or 2 arguments" - ) - exit_with_help(1) - - if predictions is None: - logging.error("Pickle function is invalid - returned None") - exit_with_help(1) - elif type(predictions) != pd.DataFrame: - logging.error( - f"Pickle function is invalid - returned {type(predictions)} instead of pd.DataFrame" - ) - exit_with_help(1) - elif len(predictions) == 0: - logging.error("Pickle function returned 0 predictions") - exit_with_help(1) - elif predictions.isna().any().any(): - logging.error("Pickle function returned at least 1 NaN prediction") - exit_with_help(1) - elif not (predictions.iloc[:, 0].between(0, 1).all().all()): - logging.error( - "Pickle function returned invalid predictions. Ensure values are between 0 and 1." - ) - exit_with_help(1) + predictions = [] + for era, era_features in live_features.groupby("era"): + if num_eras > 1: + logging.debug(f"Predicting era {era} with {len(era_features)} rows") + if num_args == 1: + era_predictions = model(era_features) + elif num_args == 2: + era_benchmark_models = benchmark_models.loc[era_features.index] + era_predictions = model(era_features, era_benchmark_models) + else: + logging.error( + f"Invalid pickle function - {model_pkl} must have 1 or 2 arguments" + ) + exit_with_help(1) + if era_predictions is None: + logging.error("Pickle function is invalid - returned None") + exit_with_help(1) + elif type(era_predictions) != pd.DataFrame: + logging.error( + f"Pickle function is invalid - returned {type(era_predictions)} instead of pd.DataFrame" + ) + exit_with_help(1) + elif len(era_predictions) != len(era_features): + logging.error( + f"Pickle function returned {len(era_predictions)} predictions, expected {len(era_features)}" + ) + exit_with_help(1) + elif era_predictions.isna().any().any(): + logging.error("Pickle function returned at least 1 NaN prediction") + exit_with_help(1) + elif not (era_predictions.iloc[:, 0].between(0, 1).all().all()): + logging.error( + "Pickle function returned invalid predictions. Ensure values are between 0 and 1." + ) + exit_with_help(1) + predictions.append(era_predictions) + predictions = pd.concat(predictions) except TypeError as e: logging.error(f"Pickle function is invalid - {e}") if args.debug: