2
2
from joblib import Parallel , delayed
3
3
from sklearn .base import BaseEstimator , MultiOutputMixin , RegressorMixin
4
4
from sklearn .metrics import check_scoring
5
- from sklearn .model_selection import KFold
5
+ from sklearn .model_selection import KFold , check_cv
6
6
from sklearn .utils import check_array
7
7
from sklearn .utils .validation import check_is_fitted
8
8
@@ -75,14 +75,20 @@ class RidgeRegression2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin):
75
75
parameter in e.g. :obj:`numpy.linalg.lstsq`. Be aware that for every case
76
76
we always apply a small default cutoff dependend on the numerical
77
77
accuracy of the data type of ``X`` in the fitting function.
78
+ cv: cross-validation generator or an iterable, default=None
79
+ The first yield of the generator is used do determine the two folds.
80
+ If None, a 0.5 split of the two folds is used using the arguments
81
+ :param shuffle: and :param random_state:
78
82
shuffle : bool, default=True
79
83
Whether or not to shuffle the data before splitting.
84
+ If :param cv: is not None, this parameter is ignored.
80
85
random_state : int or RandomState instance, default=None
81
86
Controls the shuffling applied to the data before applying the split.
82
87
Pass an int for reproducible output across multiple function calls.
83
88
See
84
89
`random_state glossary from sklearn (external link) <https://scikit-learn.org/stable/glossary.html#term-random-state>`_
85
90
parameter is ignored.
91
+ If :param cv: is not None, this parameter is ignored.
86
92
scoring : str, callable, default=None
87
93
A string (see model evaluation documentation) or
88
94
a scorer callable object / function with signature
@@ -115,6 +121,7 @@ def __init__(
115
121
alphas = (0.1 , 1.0 , 10.0 ),
116
122
alpha_type = "absolute" ,
117
123
regularization_method = "tikhonov" ,
124
+ cv = None ,
118
125
scoring = None ,
119
126
random_state = None ,
120
127
shuffle = True ,
@@ -123,6 +130,7 @@ def __init__(
123
130
self .alphas = np .asarray (alphas )
124
131
self .alpha_type = alpha_type
125
132
self .regularization_method = regularization_method
133
+ self .cv = cv
126
134
self .scoring = scoring
127
135
self .random_state = random_state
128
136
self .shuffle = shuffle
@@ -171,11 +179,12 @@ def fit(self, X, y):
171
179
else :
172
180
scorer = check_scoring (self , scoring = self .scoring , allow_none = False )
173
181
174
- fold1_idx , fold2_idx = next (
175
- KFold (
176
- n_splits = 2 , shuffle = self .shuffle , random_state = self .random_state
177
- ).split (X )
178
- )
182
+ if self .cv is None :
183
+ cv = KFold (n_splits = 2 , shuffle = self .shuffle , random_state = self .random_state )
184
+ else :
185
+ cv = check_cv (self .cv )
186
+
187
+ fold1_idx , fold2_idx = next (cv .split (X ))
179
188
self .coef_ = self ._2fold_cv (X , y , fold1_idx , fold2_idx , scorer )
180
189
return self
181
190
0 commit comments