diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index 224c020a7..e73e1dc73 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -209,8 +209,7 @@ def fit(self, X, y=None, warm_start=False): params = dict(ensure_min_samples=2, ensure_min_features=2, dtype=FLOAT_DTYPES) if hasattr(self, "mixing") or y is not None: - X, y = self._validate_data(X, y, **params) - X, y = validate_data(self, X, y, multi_output=True) + X, y = validate_data(self, X, y, multi_output=True, **params) if len(y.shape) == 1: # force y to have multi_output 2D format even when it's 1D, since @@ -569,7 +568,10 @@ def score(self, X, y=None): score : numpy.ndarray of (n_to_select_from_) :math:`\pi` importance for the given samples or features """ - validate_data(self, X, y, reset=False) # present for API consistency + if y is not None: + validate_data(self, X, y.ravel(), reset=False) + else: + validate_data(self, X, reset=False) # present for API consistency return self.pi_ def _init_greedy_search(self, X, y, n_to_select): @@ -744,7 +746,10 @@ def score(self, X, y=None): score : numpy.ndarray of (n_to_select_from_) :math:`\pi` importance for the given samples or features """ - validate_data(self, X, y, reset=False) # present for API consistency + if y is not None: + validate_data(self, X, y.ravel(), reset=False) + else: + validate_data(self, X, reset=False) # present for API consistency return self.pi_ def _init_greedy_search(self, X, y, n_to_select): @@ -938,7 +943,10 @@ def score(self, X, y=None): ------- hausdorff : Hausdorff distances """ - validate_data(self, X, y, reset=False) + if y is not None: + validate_data(self, X, y.ravel(), reset=False) + else: + validate_data(self, X, reset=False) return self.hausdorff_ def get_distance(self): @@ -1101,7 +1109,11 @@ def score(self, X, y=None): ------- hausdorff : Hausdorff distances """ - validate_data(self, X, y, reset=False) + if y is not None: + validate_data(self, X, y.ravel(), reset=False) + else: + validate_data(self, X, reset=False) + return self.hausdorff_ def get_distance(self): diff --git a/src/skmatter/decomposition/_kernel_pcovr.py b/src/skmatter/decomposition/_kernel_pcovr.py index 825a0cf92..8e85c1885 100644 --- a/src/skmatter/decomposition/_kernel_pcovr.py +++ b/src/skmatter/decomposition/_kernel_pcovr.py @@ -12,7 +12,7 @@ from sklearn.utils import check_random_state from sklearn.utils._arpack import _init_arpack_v0 from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip -from sklearn.utils.validation import check_is_fitted, validate_data +from sklearn.utils.validation import check_is_fitted, validate_data, _check_n_features from ..preprocessing import KernelNormalizer from ..utils import check_krr_fit, pcovr_kernel @@ -347,7 +347,7 @@ def fit(self, X, Y, W=None): except NotFittedError: self.regressor_.set_params(**regressor.get_params()) self.regressor_.X_fit_ = self.X_fit_ - self.regressor_._check_n_features(self.X_fit_, reset=True) + _check_n_features(self.regressor_, self.X_fit_, reset=True) else: Yhat = Y.copy() if W is None: diff --git a/src/skmatter/linear_model/_base.py b/src/skmatter/linear_model/_base.py index 7f91508eb..00a05c4dd 100644 --- a/src/skmatter/linear_model/_base.py +++ b/src/skmatter/linear_model/_base.py @@ -1,12 +1,12 @@ import numpy as np from scipy.linalg import orthogonal_procrustes -from sklearn.base import MultiOutputMixin, RegressorMixin +from sklearn.base import MultiOutputMixin, RegressorMixin, BaseEstimator from sklearn.linear_model import LinearRegression from sklearn.utils import check_array, check_X_y from sklearn.utils.validation import check_is_fitted -class OrthogonalRegression(MultiOutputMixin, RegressorMixin): +class OrthogonalRegression(MultiOutputMixin, RegressorMixin, BaseEstimator): r"""Orthogonal regression by solving the Procrustes problem Linear regression with the additional constraint that the weight matrix diff --git a/src/skmatter/linear_model/_ridge.py b/src/skmatter/linear_model/_ridge.py index 9dd5e1678..d16cce2ea 100644 --- a/src/skmatter/linear_model/_ridge.py +++ b/src/skmatter/linear_model/_ridge.py @@ -170,7 +170,7 @@ def fit(self, X, y): "[0,1)" ) - X, y = self._validate_data(X, y, y_numeric=True, multi_output=True) + X, y = validate_data(self, X, y, y_numeric=True, multi_output=True) self.n_samples_in_, self.n_features_in_ = X.shape # check_scoring uses estimators scoring function if the scorer is None, this is diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py index ad329ac28..35ed36828 100644 --- a/src/skmatter/preprocessing/_data.py +++ b/src/skmatter/preprocessing/_data.py @@ -1,7 +1,12 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.preprocessing._data import KernelCenterer -from sklearn.utils.validation import FLOAT_DTYPES, _check_sample_weight, check_is_fitted +from sklearn.utils.validation import ( + FLOAT_DTYPES, + _check_sample_weight, + check_is_fitted, + validate_data, +) class StandardFlexibleScaler(TransformerMixin, BaseEstimator): @@ -128,7 +133,8 @@ def fit(self, X, y=None, sample_weight=None): self : object Fitted scaler. """ - X = self._validate_data( + X = validate_data( + self, X, copy=self.copy, estimator=self, @@ -181,7 +187,8 @@ def transform(self, X, y=None, copy=None): Transformed array. """ copy = copy if copy is not None else self.copy - X = self._validate_data( + X = validate_data( + self, X, reset=False, copy=copy, @@ -298,7 +305,7 @@ def fit(self, K, y=None, sample_weight=None): self : object Fitted transformer. """ - K = self._validate_data(K, copy=True, dtype=FLOAT_DTYPES, reset=False) + K = validate_data(self, K, copy=True, dtype=FLOAT_DTYPES, reset=False) if sample_weight is not None: self.sample_weight_ = _check_sample_weight(sample_weight, K, dtype=K.dtype) @@ -350,7 +357,7 @@ def transform(self, K, copy=True): Transformed array """ check_is_fitted(self) - K = self._validate_data(K, copy=copy, dtype=FLOAT_DTYPES, reset=False) + K = validate_data(self, K, copy=copy, dtype=FLOAT_DTYPES, reset=False) if self.with_center: K_pred_cols = np.average(K, weights=self.sample_weight_, axis=1)[ @@ -391,7 +398,7 @@ def fit_transform(self, K, y=None, sample_weight=None, copy=True, **fit_params): return self.transform(K, copy) -class SparseKernelCenterer(TransformerMixin): +class SparseKernelCenterer(TransformerMixin, BaseEstimator): r"""Kernel centering method for sparse kernels, similar to :class:`KernelFlexibleCenterer`. diff --git a/src/skmatter/utils/_pcovr_utils.py b/src/skmatter/utils/_pcovr_utils.py index 8852a6386..8531cd408 100644 --- a/src/skmatter/utils/_pcovr_utils.py +++ b/src/skmatter/utils/_pcovr_utils.py @@ -5,7 +5,7 @@ from sklearn.exceptions import NotFittedError from sklearn.metrics.pairwise import pairwise_kernels from sklearn.utils.extmath import randomized_svd -from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import check_is_fitted, validate_data def check_lr_fit(regressor, X, y): @@ -39,7 +39,7 @@ def check_lr_fit(regressor, X, y): fitted_regressor = deepcopy(regressor) # Check compatibility with X - fitted_regressor._validate_data(X, y, reset=False, multi_output=True) + validate_data(fitted_regressor, X, y, reset=False, multi_output=True) # Check compatibility with y if fitted_regressor.coef_.ndim != y.ndim: @@ -103,7 +103,7 @@ def check_krr_fit(regressor, K, X, y): fitted_regressor = deepcopy(regressor) # Check compatibility with K - fitted_regressor._validate_data(X, y, reset=False, multi_output=True) + validate_data(fitted_regressor, X, y, reset=False, multi_output=True) # Check compatibility with y if fitted_regressor.dual_coef_.ndim != y.ndim: