Skip to content

Commit 42db582

Browse files
jawadhussein462Valentin-Laurent
authored andcommitted
TEST: Improve v1 integration test for SplitConformalRegressor (#580)
TEST: Improve v1 integration test for SplitConformalRegressor (#580)
1 parent c88bd51 commit 42db582

File tree

1 file changed

+106
-76
lines changed

1 file changed

+106
-76
lines changed

mapie_v1/integration_tests/tests/test_regression.py

Lines changed: 106 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -7,114 +7,150 @@
77
from sklearn.compose import TransformedTargetRegressor
88
from sklearn.datasets import make_regression
99
from sklearn.linear_model import LinearRegression
10-
from sklearn.ensemble import RandomForestRegressor
1110
from sklearn.linear_model import QuantileRegressor
1211
from sklearn.ensemble import GradientBoostingRegressor
1312
from sklearn.model_selection import train_test_split
1413

1514
from mapie.subsample import Subsample
1615
from mapie._typing import ArrayLike
1716
from mapie.conformity_scores import GammaConformityScore, \
18-
AbsoluteConformityScore
17+
AbsoluteConformityScore, ResidualNormalisedScore
1918
from mapie_v1.regression import SplitConformalRegressor, \
2019
CrossConformalRegressor, \
2120
JackknifeAfterBootstrapRegressor, \
2221
ConformalizedQuantileRegressor
2322

2423
from mapiev0.regression import MapieRegressor as MapieRegressorV0 # noqa
2524
from mapiev0.regression import MapieQuantileRegressor as MapieQuantileRegressorV0 # noqa
26-
from mapie_v1.conformity_scores._utils import \
27-
check_and_select_regression_conformity_score
2825
from mapie_v1.integration_tests.utils import (filter_params,
2926
train_test_split_shuffle)
3027
from sklearn.model_selection import LeaveOneOut, GroupKFold
3128

3229
RANDOM_STATE = 1
3330
K_FOLDS = 3
3431
N_BOOTSTRAPS = 30
35-
32+
N_SAMPLES = 200
33+
N_GROUPS = 5
3634

3735
X, y_signed = make_regression(
38-
n_samples=100,
36+
n_samples=N_SAMPLES,
3937
n_features=10,
4038
noise=1.0,
4139
random_state=RANDOM_STATE
4240
)
4341
y = np.abs(y_signed)
4442
sample_weight = RandomState(RANDOM_STATE).random(len(X))
45-
groups = [0] * 20 + [1] * 20 + [2] * 20 + [3] * 20 + [4] * 20
43+
groups = [j for j in range(N_GROUPS) for i in range((N_SAMPLES//N_GROUPS))]
4644
positive_predictor = TransformedTargetRegressor(
4745
regressor=LinearRegression(),
4846
func=lambda y_: np.log(y_ + 1),
4947
inverse_func=lambda X_: np.exp(X_) - 1
5048
)
5149

52-
X_split, y_split = make_regression(
53-
n_samples=500,
54-
n_features=10,
55-
noise=1.0,
50+
sample_weight_train = train_test_split(
51+
X,
52+
y,
53+
sample_weight,
54+
test_size=0.4,
5655
random_state=RANDOM_STATE
57-
)
56+
)[-2]
57+
58+
params_test_cases_split = [
59+
{
60+
"v0": {
61+
"alpha": 0.2,
62+
"conformity_score": AbsoluteConformityScore(),
63+
"cv": "split",
64+
"test_size": 0.4,
65+
"sample_weight": sample_weight,
66+
"random_state": RANDOM_STATE,
67+
},
68+
"v1": {
69+
"confidence_level": 0.8,
70+
"conformity_score": "absolute",
71+
"prefit": False,
72+
"test_size": 0.4,
73+
"fit_params": {"sample_weight": sample_weight_train},
74+
"random_state": RANDOM_STATE,
75+
}
76+
},
77+
{
78+
"v0": {
79+
"estimator": positive_predictor,
80+
"test_size": 0.2,
81+
"alpha": [0.5, 0.5],
82+
"conformity_score": GammaConformityScore(),
83+
"cv": "split",
84+
"random_state": RANDOM_STATE,
85+
},
86+
"v1": {
87+
"estimator": positive_predictor,
88+
"test_size": 0.2,
89+
"confidence_level": [0.5, 0.5],
90+
"conformity_score": "gamma",
91+
"prefit": False,
92+
"random_state": RANDOM_STATE,
93+
}
94+
},
95+
{
96+
"v0": {
97+
"estimator": LinearRegression(),
98+
"alpha": 0.1,
99+
"test_size": 0.2,
100+
"conformity_score": ResidualNormalisedScore(
101+
random_state=RANDOM_STATE
102+
),
103+
"cv": "prefit",
104+
"allow_infinite_bounds": True,
105+
"random_state": RANDOM_STATE,
106+
},
107+
"v1": {
108+
"estimator": LinearRegression(),
109+
"confidence_level": 0.9,
110+
"prefit": True,
111+
"test_size": 0.2,
112+
"conformity_score": ResidualNormalisedScore(
113+
random_state=RANDOM_STATE
114+
),
115+
"allow_infinite_bounds": True,
116+
"random_state": RANDOM_STATE,
117+
}
118+
},
119+
{
120+
"v0": {
121+
"estimator": positive_predictor,
122+
"alpha": 0.1,
123+
"conformity_score": GammaConformityScore(),
124+
"cv": "split",
125+
"random_state": RANDOM_STATE,
126+
"test_size": 0.3,
127+
"optimize_beta": True
128+
},
129+
"v1": {
130+
"estimator": positive_predictor,
131+
"confidence_level": 0.9,
132+
"conformity_score": GammaConformityScore(),
133+
"random_state": RANDOM_STATE,
134+
"test_size": 0.3,
135+
"minimize_interval_width": True
136+
}
137+
},
138+
]
58139

59140

60-
@pytest.mark.parametrize("cv", ["split", "prefit"])
61-
@pytest.mark.parametrize("method", ["base", "plus", "minmax"])
62-
@pytest.mark.parametrize("conformity_score", ["absolute"])
63-
@pytest.mark.parametrize("confidence_level", [0.9, 0.95, 0.99])
64-
@pytest.mark.parametrize("agg_function", ["mean", "median"])
65-
@pytest.mark.parametrize("allow_infinite_bounds", [True, False])
66-
@pytest.mark.parametrize(
67-
"estimator", [
68-
LinearRegression(),
69-
RandomForestRegressor(random_state=RANDOM_STATE, max_depth=2)])
70-
@pytest.mark.parametrize("test_size", [0.2, 0.5])
71-
def test_intervals_and_predictions_exact_equality_split(
72-
cv,
73-
method,
74-
conformity_score,
75-
confidence_level,
76-
agg_function,
77-
allow_infinite_bounds,
78-
estimator,
79-
test_size
80-
):
81-
"""
82-
Test that the prediction intervals are exactly the same
83-
between v0 and v1 models when using the same settings.
84-
"""
85-
prefit = cv == "prefit"
86-
87-
v0_params = {
88-
"estimator": estimator,
89-
"method": method,
90-
"conformity_score": check_and_select_regression_conformity_score(
91-
conformity_score
92-
),
93-
"alpha": 1 - confidence_level,
94-
"agg_function": agg_function,
95-
"test_size": test_size,
96-
"allow_infinite_bounds": allow_infinite_bounds,
97-
"cv": cv,
98-
"random_state": RANDOM_STATE,
99-
}
100-
v1_params = {
101-
"estimator": estimator,
102-
"method": method,
103-
"conformity_score": conformity_score,
104-
"confidence_level": confidence_level,
105-
"aggregate_function": agg_function,
106-
"random_state": RANDOM_STATE,
107-
"n_bootstraps": N_BOOTSTRAPS,
108-
"allow_infinite_bounds": allow_infinite_bounds,
109-
"prefit": prefit,
110-
"random_state": RANDOM_STATE,
111-
}
141+
@pytest.mark.parametrize("params_split", params_test_cases_split)
142+
def test_intervals_and_predictions_exact_equality_split(params_split):
143+
v0_params = params_split["v0"]
144+
v1_params = params_split["v1"]
145+
146+
test_size = v1_params.get("test_size", None)
147+
prefit = v1_params.get("prefit", False)
112148

113149
compare_model_predictions_and_intervals(
114150
model_v0=MapieRegressorV0,
115151
model_v1=SplitConformalRegressor,
116-
X=X_split,
117-
y=y_split,
152+
X=X,
153+
y=y,
118154
v0_params=v0_params,
119155
v1_params=v1_params,
120156
test_size=test_size,
@@ -307,14 +343,6 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife):
307343
)
308344
gbr_models.append(estimator_)
309345

310-
sample_weight_train = train_test_split(
311-
X,
312-
y,
313-
sample_weight,
314-
test_size=0.4,
315-
random_state=RANDOM_STATE
316-
)[-2]
317-
318346
params_test_cases_quantile = [
319347
{
320348
"v0": {
@@ -336,6 +364,7 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife):
336364
{
337365
"v0": {
338366
"estimator": gbr_models,
367+
"alpha": gbr_alpha,
339368
"cv": "prefit",
340369
"method": "quantile",
341370
"calib_size": 0.2,
@@ -345,6 +374,7 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife):
345374
},
346375
"v1": {
347376
"estimator": gbr_models,
377+
"confidence_level": 1-gbr_alpha,
348378
"prefit": True,
349379
"test_size": 0.2,
350380
"fit_params": {"sample_weight": sample_weight},
@@ -396,8 +426,8 @@ def test_intervals_and_predictions_exact_equality_quantile(params_quantile):
396426
v0_params = params_quantile["v0"]
397427
v1_params = params_quantile["v1"]
398428

399-
test_size = v1_params["test_size"] if "test_size" in v1_params else None
400-
prefit = ("prefit" in v1_params) and v1_params["prefit"]
429+
test_size = v1_params.get("test_size", None)
430+
prefit = v1_params.get("prefit", False)
401431

402432
compare_model_predictions_and_intervals(
403433
model_v0=MapieQuantileRegressorV0,

0 commit comments

Comments
 (0)