7
7
from sklearn .compose import TransformedTargetRegressor
8
8
from sklearn .datasets import make_regression
9
9
from sklearn .linear_model import LinearRegression
10
- from sklearn .ensemble import RandomForestRegressor
11
10
from sklearn .linear_model import QuantileRegressor
12
11
from sklearn .ensemble import GradientBoostingRegressor
13
12
from sklearn .model_selection import train_test_split
14
13
15
14
from mapie .subsample import Subsample
16
15
from mapie ._typing import ArrayLike
17
16
from mapie .conformity_scores import GammaConformityScore , \
18
- AbsoluteConformityScore
17
+ AbsoluteConformityScore , ResidualNormalisedScore
19
18
from mapie_v1 .regression import SplitConformalRegressor , \
20
19
CrossConformalRegressor , \
21
20
JackknifeAfterBootstrapRegressor , \
22
21
ConformalizedQuantileRegressor
23
22
24
23
from mapiev0 .regression import MapieRegressor as MapieRegressorV0 # noqa
25
24
from mapiev0 .regression import MapieQuantileRegressor as MapieQuantileRegressorV0 # noqa
26
- from mapie_v1 .conformity_scores ._utils import \
27
- check_and_select_regression_conformity_score
28
25
from mapie_v1 .integration_tests .utils import (filter_params ,
29
26
train_test_split_shuffle )
30
27
from sklearn .model_selection import LeaveOneOut , GroupKFold
31
28
32
29
RANDOM_STATE = 1
33
30
K_FOLDS = 3
34
31
N_BOOTSTRAPS = 30
35
-
32
+ N_SAMPLES = 200
33
+ N_GROUPS = 5
36
34
37
35
X , y_signed = make_regression (
38
- n_samples = 100 ,
36
+ n_samples = N_SAMPLES ,
39
37
n_features = 10 ,
40
38
noise = 1.0 ,
41
39
random_state = RANDOM_STATE
42
40
)
43
41
y = np .abs (y_signed )
44
42
sample_weight = RandomState (RANDOM_STATE ).random (len (X ))
45
- groups = [0 ] * 20 + [ 1 ] * 20 + [ 2 ] * 20 + [ 3 ] * 20 + [ 4 ] * 20
43
+ groups = [j for j in range ( N_GROUPS ) for i in range (( N_SAMPLES // N_GROUPS ))]
46
44
positive_predictor = TransformedTargetRegressor (
47
45
regressor = LinearRegression (),
48
46
func = lambda y_ : np .log (y_ + 1 ),
49
47
inverse_func = lambda X_ : np .exp (X_ ) - 1
50
48
)
51
49
52
- X_split , y_split = make_regression (
53
- n_samples = 500 ,
54
- n_features = 10 ,
55
- noise = 1.0 ,
50
+ sample_weight_train = train_test_split (
51
+ X ,
52
+ y ,
53
+ sample_weight ,
54
+ test_size = 0.4 ,
56
55
random_state = RANDOM_STATE
57
- )
56
+ )[- 2 ]
57
+
58
+ params_test_cases_split = [
59
+ {
60
+ "v0" : {
61
+ "alpha" : 0.2 ,
62
+ "conformity_score" : AbsoluteConformityScore (),
63
+ "cv" : "split" ,
64
+ "test_size" : 0.4 ,
65
+ "sample_weight" : sample_weight ,
66
+ "random_state" : RANDOM_STATE ,
67
+ },
68
+ "v1" : {
69
+ "confidence_level" : 0.8 ,
70
+ "conformity_score" : "absolute" ,
71
+ "prefit" : False ,
72
+ "test_size" : 0.4 ,
73
+ "fit_params" : {"sample_weight" : sample_weight_train },
74
+ "random_state" : RANDOM_STATE ,
75
+ }
76
+ },
77
+ {
78
+ "v0" : {
79
+ "estimator" : positive_predictor ,
80
+ "test_size" : 0.2 ,
81
+ "alpha" : [0.5 , 0.5 ],
82
+ "conformity_score" : GammaConformityScore (),
83
+ "cv" : "split" ,
84
+ "random_state" : RANDOM_STATE ,
85
+ },
86
+ "v1" : {
87
+ "estimator" : positive_predictor ,
88
+ "test_size" : 0.2 ,
89
+ "confidence_level" : [0.5 , 0.5 ],
90
+ "conformity_score" : "gamma" ,
91
+ "prefit" : False ,
92
+ "random_state" : RANDOM_STATE ,
93
+ }
94
+ },
95
+ {
96
+ "v0" : {
97
+ "estimator" : LinearRegression (),
98
+ "alpha" : 0.1 ,
99
+ "test_size" : 0.2 ,
100
+ "conformity_score" : ResidualNormalisedScore (
101
+ random_state = RANDOM_STATE
102
+ ),
103
+ "cv" : "prefit" ,
104
+ "allow_infinite_bounds" : True ,
105
+ "random_state" : RANDOM_STATE ,
106
+ },
107
+ "v1" : {
108
+ "estimator" : LinearRegression (),
109
+ "confidence_level" : 0.9 ,
110
+ "prefit" : True ,
111
+ "test_size" : 0.2 ,
112
+ "conformity_score" : ResidualNormalisedScore (
113
+ random_state = RANDOM_STATE
114
+ ),
115
+ "allow_infinite_bounds" : True ,
116
+ "random_state" : RANDOM_STATE ,
117
+ }
118
+ },
119
+ {
120
+ "v0" : {
121
+ "estimator" : positive_predictor ,
122
+ "alpha" : 0.1 ,
123
+ "conformity_score" : GammaConformityScore (),
124
+ "cv" : "split" ,
125
+ "random_state" : RANDOM_STATE ,
126
+ "test_size" : 0.3 ,
127
+ "optimize_beta" : True
128
+ },
129
+ "v1" : {
130
+ "estimator" : positive_predictor ,
131
+ "confidence_level" : 0.9 ,
132
+ "conformity_score" : GammaConformityScore (),
133
+ "random_state" : RANDOM_STATE ,
134
+ "test_size" : 0.3 ,
135
+ "minimize_interval_width" : True
136
+ }
137
+ },
138
+ ]
58
139
59
140
60
- @pytest .mark .parametrize ("cv" , ["split" , "prefit" ])
61
- @pytest .mark .parametrize ("method" , ["base" , "plus" , "minmax" ])
62
- @pytest .mark .parametrize ("conformity_score" , ["absolute" ])
63
- @pytest .mark .parametrize ("confidence_level" , [0.9 , 0.95 , 0.99 ])
64
- @pytest .mark .parametrize ("agg_function" , ["mean" , "median" ])
65
- @pytest .mark .parametrize ("allow_infinite_bounds" , [True , False ])
66
- @pytest .mark .parametrize (
67
- "estimator" , [
68
- LinearRegression (),
69
- RandomForestRegressor (random_state = RANDOM_STATE , max_depth = 2 )])
70
- @pytest .mark .parametrize ("test_size" , [0.2 , 0.5 ])
71
- def test_intervals_and_predictions_exact_equality_split (
72
- cv ,
73
- method ,
74
- conformity_score ,
75
- confidence_level ,
76
- agg_function ,
77
- allow_infinite_bounds ,
78
- estimator ,
79
- test_size
80
- ):
81
- """
82
- Test that the prediction intervals are exactly the same
83
- between v0 and v1 models when using the same settings.
84
- """
85
- prefit = cv == "prefit"
86
-
87
- v0_params = {
88
- "estimator" : estimator ,
89
- "method" : method ,
90
- "conformity_score" : check_and_select_regression_conformity_score (
91
- conformity_score
92
- ),
93
- "alpha" : 1 - confidence_level ,
94
- "agg_function" : agg_function ,
95
- "test_size" : test_size ,
96
- "allow_infinite_bounds" : allow_infinite_bounds ,
97
- "cv" : cv ,
98
- "random_state" : RANDOM_STATE ,
99
- }
100
- v1_params = {
101
- "estimator" : estimator ,
102
- "method" : method ,
103
- "conformity_score" : conformity_score ,
104
- "confidence_level" : confidence_level ,
105
- "aggregate_function" : agg_function ,
106
- "random_state" : RANDOM_STATE ,
107
- "n_bootstraps" : N_BOOTSTRAPS ,
108
- "allow_infinite_bounds" : allow_infinite_bounds ,
109
- "prefit" : prefit ,
110
- "random_state" : RANDOM_STATE ,
111
- }
141
+ @pytest .mark .parametrize ("params_split" , params_test_cases_split )
142
+ def test_intervals_and_predictions_exact_equality_split (params_split ):
143
+ v0_params = params_split ["v0" ]
144
+ v1_params = params_split ["v1" ]
145
+
146
+ test_size = v1_params .get ("test_size" , None )
147
+ prefit = v1_params .get ("prefit" , False )
112
148
113
149
compare_model_predictions_and_intervals (
114
150
model_v0 = MapieRegressorV0 ,
115
151
model_v1 = SplitConformalRegressor ,
116
- X = X_split ,
117
- y = y_split ,
152
+ X = X ,
153
+ y = y ,
118
154
v0_params = v0_params ,
119
155
v1_params = v1_params ,
120
156
test_size = test_size ,
@@ -307,14 +343,6 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife):
307
343
)
308
344
gbr_models .append (estimator_ )
309
345
310
- sample_weight_train = train_test_split (
311
- X ,
312
- y ,
313
- sample_weight ,
314
- test_size = 0.4 ,
315
- random_state = RANDOM_STATE
316
- )[- 2 ]
317
-
318
346
params_test_cases_quantile = [
319
347
{
320
348
"v0" : {
@@ -336,6 +364,7 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife):
336
364
{
337
365
"v0" : {
338
366
"estimator" : gbr_models ,
367
+ "alpha" : gbr_alpha ,
339
368
"cv" : "prefit" ,
340
369
"method" : "quantile" ,
341
370
"calib_size" : 0.2 ,
@@ -345,6 +374,7 @@ def test_intervals_and_predictions_exact_equality_jackknife(params_jackknife):
345
374
},
346
375
"v1" : {
347
376
"estimator" : gbr_models ,
377
+ "confidence_level" : 1 - gbr_alpha ,
348
378
"prefit" : True ,
349
379
"test_size" : 0.2 ,
350
380
"fit_params" : {"sample_weight" : sample_weight },
@@ -396,8 +426,8 @@ def test_intervals_and_predictions_exact_equality_quantile(params_quantile):
396
426
v0_params = params_quantile ["v0" ]
397
427
v1_params = params_quantile ["v1" ]
398
428
399
- test_size = v1_params [ "test_size" ] if "test_size" in v1_params else None
400
- prefit = ("prefit" in v1_params ) and v1_params [ "prefit" ]
429
+ test_size = v1_params . get ( "test_size" , None )
430
+ prefit = v1_params . get ("prefit" , False )
401
431
402
432
compare_model_predictions_and_intervals (
403
433
model_v0 = MapieQuantileRegressorV0 ,
0 commit comments