From d9201eff0453dc0b730c7937bdd01dd91519a0a2 Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Fri, 14 Feb 2025 15:55:11 -0600 Subject: [PATCH 1/3] replacing `self._validate_data` with `validate_data` --- src/skmatter/_selection.py | 3 +-- src/skmatter/linear_model/_ridge.py | 2 +- src/skmatter/preprocessing/_data.py | 17 ++++++++++++----- src/skmatter/utils/_pcovr_utils.py | 6 +++--- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index 224c020a7..517a91166 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -209,8 +209,7 @@ def fit(self, X, y=None, warm_start=False): params = dict(ensure_min_samples=2, ensure_min_features=2, dtype=FLOAT_DTYPES) if hasattr(self, "mixing") or y is not None: - X, y = self._validate_data(X, y, **params) - X, y = validate_data(self, X, y, multi_output=True) + X, y = validate_data(self, X, y, multi_output=True, **params) if len(y.shape) == 1: # force y to have multi_output 2D format even when it's 1D, since diff --git a/src/skmatter/linear_model/_ridge.py b/src/skmatter/linear_model/_ridge.py index 9dd5e1678..d16cce2ea 100644 --- a/src/skmatter/linear_model/_ridge.py +++ b/src/skmatter/linear_model/_ridge.py @@ -170,7 +170,7 @@ def fit(self, X, y): "[0,1)" ) - X, y = self._validate_data(X, y, y_numeric=True, multi_output=True) + X, y = validate_data(self, X, y, y_numeric=True, multi_output=True) self.n_samples_in_, self.n_features_in_ = X.shape # check_scoring uses estimators scoring function if the scorer is None, this is diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py index ad329ac28..c11517882 100644 --- a/src/skmatter/preprocessing/_data.py +++ b/src/skmatter/preprocessing/_data.py @@ -1,7 +1,12 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.preprocessing._data import KernelCenterer -from sklearn.utils.validation import FLOAT_DTYPES, _check_sample_weight, check_is_fitted +from sklearn.utils.validation import ( + FLOAT_DTYPES, + _check_sample_weight, + check_is_fitted, + validate_data, +) class StandardFlexibleScaler(TransformerMixin, BaseEstimator): @@ -128,7 +133,8 @@ def fit(self, X, y=None, sample_weight=None): self : object Fitted scaler. """ - X = self._validate_data( + X = validate_data( + self, X, copy=self.copy, estimator=self, @@ -181,7 +187,8 @@ def transform(self, X, y=None, copy=None): Transformed array. """ copy = copy if copy is not None else self.copy - X = self._validate_data( + X = validate_data( + self, X, reset=False, copy=copy, @@ -298,7 +305,7 @@ def fit(self, K, y=None, sample_weight=None): self : object Fitted transformer. """ - K = self._validate_data(K, copy=True, dtype=FLOAT_DTYPES, reset=False) + K = validate_data(self, K, copy=True, dtype=FLOAT_DTYPES, reset=False) if sample_weight is not None: self.sample_weight_ = _check_sample_weight(sample_weight, K, dtype=K.dtype) @@ -350,7 +357,7 @@ def transform(self, K, copy=True): Transformed array """ check_is_fitted(self) - K = self._validate_data(K, copy=copy, dtype=FLOAT_DTYPES, reset=False) + K = validate_data(self, K, copy=copy, dtype=FLOAT_DTYPES, reset=False) if self.with_center: K_pred_cols = np.average(K, weights=self.sample_weight_, axis=1)[ diff --git a/src/skmatter/utils/_pcovr_utils.py b/src/skmatter/utils/_pcovr_utils.py index 8852a6386..8531cd408 100644 --- a/src/skmatter/utils/_pcovr_utils.py +++ b/src/skmatter/utils/_pcovr_utils.py @@ -5,7 +5,7 @@ from sklearn.exceptions import NotFittedError from sklearn.metrics.pairwise import pairwise_kernels from sklearn.utils.extmath import randomized_svd -from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import check_is_fitted, validate_data def check_lr_fit(regressor, X, y): @@ -39,7 +39,7 @@ def check_lr_fit(regressor, X, y): fitted_regressor = deepcopy(regressor) # Check compatibility with X - fitted_regressor._validate_data(X, y, reset=False, multi_output=True) + validate_data(fitted_regressor, X, y, reset=False, multi_output=True) # Check compatibility with y if fitted_regressor.coef_.ndim != y.ndim: @@ -103,7 +103,7 @@ def check_krr_fit(regressor, K, X, y): fitted_regressor = deepcopy(regressor) # Check compatibility with K - fitted_regressor._validate_data(X, y, reset=False, multi_output=True) + validate_data(fitted_regressor, X, y, reset=False, multi_output=True) # Check compatibility with y if fitted_regressor.dual_coef_.ndim != y.ndim: From 75c2191d45781a9c6a7b13a33d7100fade85ffc4 Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Fri, 14 Feb 2025 16:36:17 -0600 Subject: [PATCH 2/3] Additional fixes --- src/skmatter/decomposition/_kernel_pcovr.py | 4 ++-- src/skmatter/preprocessing/_data.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/skmatter/decomposition/_kernel_pcovr.py b/src/skmatter/decomposition/_kernel_pcovr.py index 825a0cf92..8e85c1885 100644 --- a/src/skmatter/decomposition/_kernel_pcovr.py +++ b/src/skmatter/decomposition/_kernel_pcovr.py @@ -12,7 +12,7 @@ from sklearn.utils import check_random_state from sklearn.utils._arpack import _init_arpack_v0 from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip -from sklearn.utils.validation import check_is_fitted, validate_data +from sklearn.utils.validation import check_is_fitted, validate_data, _check_n_features from ..preprocessing import KernelNormalizer from ..utils import check_krr_fit, pcovr_kernel @@ -347,7 +347,7 @@ def fit(self, X, Y, W=None): except NotFittedError: self.regressor_.set_params(**regressor.get_params()) self.regressor_.X_fit_ = self.X_fit_ - self.regressor_._check_n_features(self.X_fit_, reset=True) + _check_n_features(self.regressor_, self.X_fit_, reset=True) else: Yhat = Y.copy() if W is None: diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py index c11517882..35ed36828 100644 --- a/src/skmatter/preprocessing/_data.py +++ b/src/skmatter/preprocessing/_data.py @@ -398,7 +398,7 @@ def fit_transform(self, K, y=None, sample_weight=None, copy=True, **fit_params): return self.transform(K, copy) -class SparseKernelCenterer(TransformerMixin): +class SparseKernelCenterer(TransformerMixin, BaseEstimator): r"""Kernel centering method for sparse kernels, similar to :class:`KernelFlexibleCenterer`. From 45681d53c8c8c42d7bc84452ea7269b5dd370d8a Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Fri, 14 Feb 2025 17:17:04 -0600 Subject: [PATCH 3/3] Fixing scikit-learn warnings --- src/skmatter/_selection.py | 21 +++++++++++++++++---- src/skmatter/linear_model/_base.py | 4 ++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index 517a91166..e73e1dc73 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -568,7 +568,10 @@ def score(self, X, y=None): score : numpy.ndarray of (n_to_select_from_) :math:`\pi` importance for the given samples or features """ - validate_data(self, X, y, reset=False) # present for API consistency + if y is not None: + validate_data(self, X, y.ravel(), reset=False) + else: + validate_data(self, X, reset=False) # present for API consistency return self.pi_ def _init_greedy_search(self, X, y, n_to_select): @@ -743,7 +746,10 @@ def score(self, X, y=None): score : numpy.ndarray of (n_to_select_from_) :math:`\pi` importance for the given samples or features """ - validate_data(self, X, y, reset=False) # present for API consistency + if y is not None: + validate_data(self, X, y.ravel(), reset=False) + else: + validate_data(self, X, reset=False) # present for API consistency return self.pi_ def _init_greedy_search(self, X, y, n_to_select): @@ -937,7 +943,10 @@ def score(self, X, y=None): ------- hausdorff : Hausdorff distances """ - validate_data(self, X, y, reset=False) + if y is not None: + validate_data(self, X, y.ravel(), reset=False) + else: + validate_data(self, X, reset=False) return self.hausdorff_ def get_distance(self): @@ -1100,7 +1109,11 @@ def score(self, X, y=None): ------- hausdorff : Hausdorff distances """ - validate_data(self, X, y, reset=False) + if y is not None: + validate_data(self, X, y.ravel(), reset=False) + else: + validate_data(self, X, reset=False) + return self.hausdorff_ def get_distance(self): diff --git a/src/skmatter/linear_model/_base.py b/src/skmatter/linear_model/_base.py index 7f91508eb..00a05c4dd 100644 --- a/src/skmatter/linear_model/_base.py +++ b/src/skmatter/linear_model/_base.py @@ -1,12 +1,12 @@ import numpy as np from scipy.linalg import orthogonal_procrustes -from sklearn.base import MultiOutputMixin, RegressorMixin +from sklearn.base import MultiOutputMixin, RegressorMixin, BaseEstimator from sklearn.linear_model import LinearRegression from sklearn.utils import check_array, check_X_y from sklearn.utils.validation import check_is_fitted -class OrthogonalRegression(MultiOutputMixin, RegressorMixin): +class OrthogonalRegression(MultiOutputMixin, RegressorMixin, BaseEstimator): r"""Orthogonal regression by solving the Procrustes problem Linear regression with the additional constraint that the weight matrix