Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 106 additions & 76 deletions mapie_v1/integration_tests/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,114 +7,150 @@
from sklearn.compose import TransformedTargetRegressor
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import QuantileRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split

from mapie.subsample import Subsample
from mapie._typing import ArrayLike
from mapie.conformity_scores import GammaConformityScore, \
AbsoluteConformityScore
AbsoluteConformityScore, ResidualNormalisedScore
from mapie_v1.regression import SplitConformalRegressor, \
CrossConformalRegressor, \
JackknifeAfterBootstrapRegressor, \
ConformalizedQuantileRegressor

from mapiev0.regression import MapieRegressor as MapieRegressorV0 # noqa
from mapiev0.regression import MapieQuantileRegressor as MapieQuantileRegressorV0 # noqa
from mapie_v1.conformity_scores._utils import \
check_and_select_regression_conformity_score
from mapie_v1.integration_tests.utils import (filter_params,
train_test_split_shuffle)
from sklearn.model_selection import LeaveOneOut, GroupKFold

RANDOM_STATE = 1
K_FOLDS = 3
N_BOOTSTRAPS = 30

N_SAMPLES = 200
N_GROUPS = 5

X, y_signed = make_regression(
n_samples=100,
n_samples=N_SAMPLES,
n_features=10,
noise=1.0,
random_state=RANDOM_STATE
)
y = np.abs(y_signed)
sample_weight = RandomState(RANDOM_STATE).random(len(X))
groups = [0] * 20 + [1] * 20 + [2] * 20 + [3] * 20 + [4] * 20
groups = [j for j in range(N_GROUPS) for i in range((N_SAMPLES//N_GROUPS))]
positive_predictor = TransformedTargetRegressor(
regressor=LinearRegression(),
func=lambda y_: np.log(y_ + 1),
inverse_func=lambda X_: np.exp(X_) - 1
)

X_split, y_split = make_regression(
n_samples=500,
n_features=10,
noise=1.0,
sample_weight_train = train_test_split(
X,
y,
sample_weight,
test_size=0.4,
random_state=RANDOM_STATE
)
)[-2]

params_test_cases_split = [
{
"v0": {
"alpha": 0.2,
"conformity_score": AbsoluteConformityScore(),
"cv": "split",
"test_size": 0.4,
"sample_weight": sample_weight,
"random_state": RANDOM_STATE,
},
"v1": {
"confidence_level": 0.8,
"conformity_score": "absolute",
"prefit": False,
"test_size": 0.4,
"fit_params": {"sample_weight": sample_weight_train},
"random_state": RANDOM_STATE,
}
},
{
"v0": {
"estimator": positive_predictor,
"test_size": 0.2,
"alpha": [0.5, 0.5],
"conformity_score": GammaConformityScore(),
"cv": "split",
"random_state": RANDOM_STATE,
},
"v1": {
"estimator": positive_predictor,
"test_size": 0.2,
"confidence_level": [0.5, 0.5],
"conformity_score": "gamma",
"prefit": False,
"random_state": RANDOM_STATE,
}
},
{
"v0": {
"estimator": LinearRegression(),
"alpha": 0.1,
"test_size": 0.2,
"conformity_score": ResidualNormalisedScore(
random_state=RANDOM_STATE
),
"cv": "prefit",
"allow_infinite_bounds": True,
"random_state": RANDOM_STATE,
},
"v1": {
"estimator": LinearRegression(),
"confidence_level": 0.9,
"prefit": True,
"test_size": 0.2,
"conformity_score": ResidualNormalisedScore(
random_state=RANDOM_STATE
),
"allow_infinite_bounds": True,
"random_state": RANDOM_STATE,
}
},
{
"v0": {
"estimator": positive_predictor,
"alpha": 0.1,
"conformity_score": GammaConformityScore(),
"cv": "split",
"random_state": RANDOM_STATE,
"test_size": 0.3,
"optimize_beta": True
},
"v1": {
"estimator": positive_predictor,
"confidence_level": 0.9,
"conformity_score": GammaConformityScore(),
"random_state": RANDOM_STATE,
"test_size": 0.3,
"minimize_interval_width": True
}
},
]


@pytest.mark.parametrize("cv", ["split", "prefit"])
@pytest.mark.parametrize("method", ["base", "plus", "minmax"])
@pytest.mark.parametrize("conformity_score", ["absolute"])
@pytest.mark.parametrize("confidence_level", [0.9, 0.95, 0.99])
@pytest.mark.parametrize("agg_function", ["mean", "median"])
@pytest.mark.parametrize("allow_infinite_bounds", [True, False])
@pytest.mark.parametrize(
"estimator", [
LinearRegression(),
RandomForestRegressor(random_state=RANDOM_STATE, max_depth=2)])
@pytest.mark.parametrize("test_size", [0.2, 0.5])
def test_intervals_and_predictions_exact_equality_split(
cv,
method,
conformity_score,
confidence_level,
agg_function,
allow_infinite_bounds,
estimator,
test_size
):
"""
Test that the prediction intervals are exactly the same
between v0 and v1 models when using the same settings.
"""
prefit = cv == "prefit"

v0_params = {
"estimator": estimator,
"method": method,
"conformity_score": check_and_select_regression_conformity_score(
conformity_score
),
"alpha": 1 - confidence_level,
"agg_function": agg_function,
"test_size": test_size,
"allow_infinite_bounds": allow_infinite_bounds,
"cv": cv,
"random_state": RANDOM_STATE,
}
v1_params = {
"estimator": estimator,
"method": method,
"conformity_score": conformity_score,
"confidence_level": confidence_level,
"aggregate_function": agg_function,
"random_state": RANDOM_STATE,
"n_bootstraps": N_BOOTSTRAPS,
"allow_infinite_bounds": allow_infinite_bounds,
"prefit": prefit,
"random_state": RANDOM_STATE,
}
@pytest.mark.parametrize("params_split", params_test_cases_split)
def test_intervals_and_predictions_exact_equality_split(params_split):
v0_params = params_split["v0"]
v1_params = params_split["v1"]

test_size = v1_params.get("test_size", None)
prefit = v1_params.get("prefit", False)

compare_model_predictions_and_intervals(
model_v0=MapieRegressorV0,
model_v1=SplitConformalRegressor,
X=X_split,
y=y_split,
X=X,
y=y,
v0_params=v0_params,
v1_params=v1_params,
test_size=test_size,
Expand Down Expand Up @@ -307,14 +343,6 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife):
)
gbr_models.append(estimator_)

sample_weight_train = train_test_split(
X,
y,
sample_weight,
test_size=0.4,
random_state=RANDOM_STATE
)[-2]

params_test_cases_quantile = [
{
"v0": {
Expand All @@ -336,6 +364,7 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife):
{
"v0": {
"estimator": gbr_models,
"alpha": gbr_alpha,
"cv": "prefit",
"method": "quantile",
"calib_size": 0.2,
Expand All @@ -345,6 +374,7 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife):
},
"v1": {
"estimator": gbr_models,
"confidence_level": 1-gbr_alpha,
"prefit": True,
"test_size": 0.2,
"fit_params": {"sample_weight": sample_weight},
Expand Down Expand Up @@ -396,8 +426,8 @@ def test_intervals_and_predictions_exact_equality_quantile(params_quantile):
v0_params = params_quantile["v0"]
v1_params = params_quantile["v1"]

test_size = v1_params["test_size"] if "test_size" in v1_params else None
prefit = ("prefit" in v1_params) and v1_params["prefit"]
test_size = v1_params.get("test_size", None)
prefit = v1_params.get("prefit", False)

compare_model_predictions_and_intervals(
model_v0=MapieQuantileRegressorV0,
Expand Down
Loading