diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1d3951238..9567edaf6 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -28,4 +28,4 @@ Please describe the tests that you ran to verify your changes. Provide instructi - [ ] Typing passes successfully : `make type-check` - [ ] Unit tests pass successfully : `make tests` - [ ] Coverage is 100% : `make coverage` -- [ ] Documentation builds successfully : `make doc` \ No newline at end of file +- [ ] Documentation builds successfully and without warnings : `make doc` \ No newline at end of file diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 1a1083857..81b04b707 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -48,7 +48,7 @@ Finally, install `mapie` in development mode: Documenting your change ----------------------- -If you're adding a class or a function, then you'll need to add a docstring with a doctest. We follow the `numpy docstring convention `_, so please do too. +If you're adding a public class or function, then you'll need to add a docstring with a doctest. We follow the `numpy docstring convention `_, so please do too. Any estimator should follow the `scikit-learn API `_, so please follow these guidelines. In order to build the documentation locally, you first need to install some dependencies: diff --git a/HISTORY.rst b/HISTORY.rst index 916c81546..5a896877a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -8,6 +8,7 @@ History * Fix issue 525 in contribution guidelines with syntax errors in hyperlinks and other formatting issues. * Bump wheel version to avoid known security vulnerabilities * Fix issue 495 to center correctly the prediction intervals +* Fix most documentation build warnings 0.9.1 (2024-09-13) ------------------ diff --git a/doc/conf.py b/doc/conf.py index b56a02a87..0b1af45f2 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -67,6 +67,9 @@ # generate autosummary even if no references autosummary_generate = True + +autosectionlabel_prefix_document = True + # The suffix of source filenames. source_suffix = ".rst" diff --git a/examples/mondrian/1-quickstart/plot_main-tutorial-mondrian-regression.py b/examples/mondrian/1-quickstart/plot_main-tutorial-mondrian-regression.py index 903b23702..6a58fe0fe 100644 --- a/examples/mondrian/1-quickstart/plot_main-tutorial-mondrian-regression.py +++ b/examples/mondrian/1-quickstart/plot_main-tutorial-mondrian-regression.py @@ -36,7 +36,7 @@ ############################################################################## # 1. Create the noisy dataset -# ----------------------------- +# ---------------------------------------------------------------------------- # We create a dataset with 10 groups, each of those groups having a different # level of noise. @@ -87,7 +87,7 @@ ############################################################################## # 2. Split the dataset into a training set, a calibration set, and a test set. -# ----------------------------- +# ---------------------------------------------------------------------------- X_train_temp, X_test, y_train_temp, y_test = train_test_split( X, y, test_size=0.2, random_state=0 @@ -119,7 +119,7 @@ ############################################################################## # 3. Fit a random forest regressor on the training set. -# ----------------------------- +# ---------------------------------------------------------------------------- rf = RandomForestRegressor(n_estimators=100) rf.fit(X_train, y_train) @@ -127,7 +127,7 @@ ############################################################################## # 4. Fit a MapieRegressor and a MondrianCP on the calibration set. -# ----------------------------- +# ---------------------------------------------------------------------------- mapie_regressor = MapieRegressor(rf, cv="prefit") mondrian_regressor = MondrianCP(MapieRegressor(rf, cv="prefit")) @@ -137,7 +137,7 @@ ############################################################################## # 5. Predict the prediction intervals on the test set with both methods. -# ----------------------------- +# ---------------------------------------------------------------------------- _, y_pss_split = mapie_regressor.predict(X_test, alpha=.1) _, y_pss_mondrian = mondrian_regressor.predict( @@ -147,7 +147,7 @@ ############################################################################## # 6. Compare the coverage by partition, plot both methods side by side. -# ----------------------------- +# ---------------------------------------------------------------------------- coverages = {} for group in np.unique(partition_test): diff --git a/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py b/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py index af4d572e9..096c184c9 100644 --- a/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py +++ b/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py @@ -27,7 +27,7 @@ ############################################################################## # 1. Construction of the dataset -# ----------------------------- +# ---------------------------------------------------------------------------- # We use a two-dimensional toy dataset with three possible labels. The idea # is to create a triangle where the observations on the edges have only one # label, those on the vertices have two labels (those of the two edges) and the @@ -94,21 +94,21 @@ ############################################################################## # 2 Recall control risk with CRC and RCPS -# --------------------------------------- +# ---------------------------------------------------------------------------- # 2.1 Fitting MapieMultiLabelClassifier -# ------------------------------------ +# ---------------------------------------------------------------------------- # MapieMultiLabelClassifier will be fitted with RCPS and CRC methods. For the # RCPS method, we will test all three Upper Confidence Bounds (Hoeffding, # Bernstein and Waudby-Smith–Ramdas). # The two methods give two different guarantees on the risk: # # * RCPS: :math:`P(R(\mathcal{T}_{\hat{\lambda}})\leq\alpha)\geq 1-\delta` -# where :math:`R(\mathcal{T}_{\hat{\lambda}})` -# is the risk we want to control and :math:`\alpha` is the desired risk +# where :math:`R(\mathcal{T}_{\hat{\lambda}})` +# is the risk we want to control and :math:`\alpha` is the desired risk # # * CRC: :math:`\mathbb{E}\left[L_{n+1}(\hat{\lambda})\right] \leq \alpha` -# where :math:`L_{n+1}(\hat{\lambda})` is the risk of a new observation and -# :math:`\alpha` is the desired risk +# where :math:`L_{n+1}(\hat{\lambda})` is the risk of a new observation and +# :math:`\alpha` is the desired risk # # In both cases, the objective of the method is to find the optimal value of # :math:`\lambda` (threshold above which we consider a label as being present) @@ -148,17 +148,17 @@ ############################################################################## # 2.2. Results -# ---------- +# ---------------------------------------------------------------------------- # To check the results of the methods, we propose two types of plots: # -# * Plots where the confidence level varies. Here two metrics are plotted -# for each method and for each UCB -# * The actual recall (which should be always near to the required one): -# we can see that they are close to each other. -# * The value of the threshold: we see that the threshold is decreasing as -# :math:`1 - \alpha` increases, which is what is expected because a -# smaller threshold will give larger prediction sets, hence a larger -# recall. +# 1 - Plots where the confidence level varies. Here two metrics are plotted +# for each method and for each UCB +# * The actual recall (which should be always near to the required one): +# we can see that they are close to each other. +# * The value of the threshold: we see that the threshold is decreasing as +# :math:`1 - \alpha` increases, which is what is expected because a +# smaller threshold will give larger prediction sets, hence a larger +# recall. # vars_y = [recalls, thresholds] @@ -177,15 +177,15 @@ plt.show() ############################################################################## -# * Plots where we choose a specific risk value (0.1 in our case) and look at -# the average risk, the UCB of the risk (for RCPS methods) and the choice of -# the threshold :math:`\lambda` -# * We can see that among the RCPS methods, the Bernstein method -# gives the best results as for a given value of :math:`\alpha` -# as we are above the required recall but with a larger value of -# :math:`\lambda` than the two others bounds. -# * The CRC method gives the best results since it guarantees the coverage -# with a larger threshold. +# 2 - Plots where we choose a specific risk value (0.1 in our case) and look at +# the average risk, the UCB of the risk (for RCPS methods) and the choice of +# the threshold :math:`\lambda` +# * We can see that among the RCPS methods, the Bernstein method +# gives the best results as for a given value of :math:`\alpha` +# as we are above the required recall but with a larger value of +# :math:`\lambda` than the two others bounds. +# * The CRC method gives the best results since it guarantees the coverage +# with a larger threshold. fig, axs = plt.subplots( 1, @@ -216,9 +216,9 @@ ############################################################################## # 3. Precision control risk with LTT -# ------------------ +# ---------------------------------------------------------------------------- # 3.1 Fitting MapieMultilabelClassifier -# ------------------------------------- +# ---------------------------------------------------------------------------- # # In this part, we will use LTT to control precision. # At the opposite of the 2 previous method, LTT can handle non-monotonous loss. @@ -266,7 +266,7 @@ ############################################################################## # 3.2 Valid parameters for precision control -# ------------------------------------------ +# ---------------------------------------------------------------------------- # We can see that not all :math:`\lambda` such that risk is below the orange # line are choosen by the procedure. Otherwise, all the lambdas that are # in the red rectangle verify family wise error rate control and allow to diff --git a/examples/regression/2-advanced-analysis/plot_conditional_coverage.py b/examples/regression/2-advanced-analysis/plot_conditional_coverage.py index df08059f4..655df767f 100644 --- a/examples/regression/2-advanced-analysis/plot_conditional_coverage.py +++ b/examples/regression/2-advanced-analysis/plot_conditional_coverage.py @@ -171,15 +171,15 @@ def sin_with_controlled_noise( # adaptive conformal methods ?". For this we have the two metrics # :func:`~mapie.metrics.regression_ssc_score` and :func:`~mapie.metrics.hsic`. # - SSC (Size Stratified Coverage) is the maximum violation of the coverage : -# the intervals are grouped by width and the coverage is computed for each -# group. The lower coverage is the maximum coverage violation. An adaptive -# method is one where this maximum violation is as close as possible to the -# global coverage. If we interpret the result for the four methods here : -# CV+ seems to be the better one. +# the intervals are grouped by width and the coverage is computed for each +# group. The lower coverage is the maximum coverage violation. An adaptive +# method is one where this maximum violation is as close as possible to the +# global coverage. If we interpret the result for the four methods here : +# CV+ seems to be the better one. # - And with the hsic correlation coefficient, we have the -# same interpretation : :func:`~mapie.metrics.hsic` computes the correlation -# between the coverage indicator and the interval size, a value of 0 -# translates an independence between the two. +# same interpretation : :func:`~mapie.metrics.hsic` computes the correlation +# between the coverage indicator and the interval size, a value of 0 +# translates an independence between the two. # # We would like to highlight here the misinterpretation that can be made # with these metrics. In fact, here CV+ with the absolute residual score diff --git a/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py b/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py index c0737c7ae..e8f368a56 100644 --- a/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py +++ b/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py @@ -54,7 +54,7 @@ ############################################################################## # 2. Defining a Conformal Predictive Distribution class with MAPIE -# ---------------------------------------------------------- +# ------------------------------------------------------------------ # # To be able to obtain the cumulative distribution function of # a prediction with MAPIE, we propose here to wrap the diff --git a/examples/regression/4-tutorials/plot_main-tutorial-regression.py b/examples/regression/4-tutorials/plot_main-tutorial-regression.py index 51d97c8f4..a1e331fb8 100644 --- a/examples/regression/4-tutorials/plot_main-tutorial-regression.py +++ b/examples/regression/4-tutorials/plot_main-tutorial-regression.py @@ -9,9 +9,12 @@ - How well do the MAPIE strategies capture the aleatoric uncertainty existing in the data? + - How do the prediction intervals estimated by the resampling strategies evolve for new *out-of-distribution* data ? + - How do the prediction intervals vary between regressor models ? + Throughout this tutorial, we estimate the prediction intervals first using a polynomial function, and then using a boosting model, and a simple neural network. diff --git a/mapie/calibration.py b/mapie/calibration.py index d15c83872..ea3834a38 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -34,10 +34,8 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin): If ``None``, estimator defaults to a ``LogisticRegression`` instance. method: Optional[str] - Method to choose for calibration method. - Choose among: - - - "top_label", performs a calibration on the class with highest score + The only valid method is "top_label". + Performs a calibration on the class with highest score given both score and class, see section 2 of [1]. By default "top_label". @@ -54,7 +52,8 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin): The cross-validation strategy to compute scores : - "split", performs a standard splitting into a calibration and a - test set. + test set. + - "prefit", assumes that ``estimator`` has been fitted already. All the data that are provided in the ``fit`` method are then used to calibrate the predictions through the score computation. diff --git a/mapie/conformity_scores/bounds/residuals.py b/mapie/conformity_scores/bounds/residuals.py index f59084455..5ce0d799a 100644 --- a/mapie/conformity_scores/bounds/residuals.py +++ b/mapie/conformity_scores/bounds/residuals.py @@ -17,8 +17,8 @@ class ResidualNormalisedScore(BaseRegressionScore): """ Residual Normalised score. - The signed conformity score = (|y - y_pred|) / r_pred. r_pred being the - predicted residual (|y - y_pred|) of the base estimator. + The signed conformity score = abs(y - y_pred) / r_pred. r_pred being the + predicted residual abs(y - y_pred) of the base estimator. It is calculated by a model that learns to predict these residuals. The learning is done with the log of the residual and we use the exponential of the prediction to avoid negative values. diff --git a/mapie/conformity_scores/classification.py b/mapie/conformity_scores/classification.py index 00e397128..5dda679cf 100644 --- a/mapie/conformity_scores/classification.py +++ b/mapie/conformity_scores/classification.py @@ -61,7 +61,7 @@ def get_predictions( This method should be implemented by any subclass of the current class. - Parameters: + Parameters ----------- X: NDArray of shape (n_samples, n_features) Observed feature values. @@ -73,7 +73,7 @@ def get_predictions( estimator: EnsembleClassifier Estimator that is fitted to predict y from X. - Returns: + Returns -------- NDArray Array of predictions. @@ -92,7 +92,7 @@ def get_conformity_score_quantiles( This method should be implemented by any subclass of the current class. - Parameters: + Parameters ----------- conformity_scores: NDArray of shape (n_samples,) Conformity scores for each sample. @@ -104,7 +104,7 @@ def get_conformity_score_quantiles( estimator: EnsembleClassifier Estimator that is fitted to predict y from X. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. @@ -125,7 +125,7 @@ def get_prediction_sets( This method should be implemented by any subclass of the current class. - Parameters: + Parameters ----------- y_pred_proba: NDArray of shape (n_samples, n_classes) Target prediction. @@ -140,7 +140,7 @@ def get_prediction_sets( estimator: EnsembleClassifier Estimator that is fitted to predict y from X. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. @@ -205,7 +205,7 @@ def predict_set( Compute the prediction sets on new samples based on the uncertainty of the target confidence set. - Parameters: + Parameters ----------- X: NDArray of shape (n_samples,) The input data or samples for prediction. @@ -216,7 +216,7 @@ def predict_set( **kwargs: dict Additional keyword arguments. - Returns: + Returns -------- The output structure depend on the ``get_sets`` method. The prediction sets for each sample and each alpha level. diff --git a/mapie/conformity_scores/regression.py b/mapie/conformity_scores/regression.py index e6e098464..1803ff54c 100644 --- a/mapie/conformity_scores/regression.py +++ b/mapie/conformity_scores/regression.py @@ -23,18 +23,17 @@ class BaseRegressionScore(BaseConformityScore, metaclass=ABCMeta): Whether to consider the conformity score as symmetrical or not. consistency_check: bool, optional - Whether to check the consistency between the methods - ``get_estimation_distribution`` and ``get_conformity_scores``. - If ``True``, the following equality must be verified: - ``self.get_estimation_distribution( - y_pred, self.get_conformity_scores(y, y_pred, **kwargs), **kwargs - ) == y`` + Whether to check the consistency between the + methods ``get_estimation_distribution`` and ``get_conformity_scores``. + If ``True``, ``self.get_estimation_distribution`` called with params + ``y_pred`` and ``self.get_conformity_scores(y, y_pred, **kwargs)`` must + be equal to ``y``. By default ``True``. eps: float, optional - Threshold to consider when checking the consistency between - ``get_estimation_distribution`` and ``get_conformity_scores``. + Threshold to consider when checking the consistency + between ``get_estimation_distribution`` and ``get_conformity_scores``. It should be specified if ``consistency_check==True``. By default, it is defined by the default precision. @@ -390,7 +389,7 @@ def predict_set( Compute the prediction sets on new samples based on the uncertainty of the target confidence set. - Parameters: + Parameters ----------- X: NDArray of shape (n_samples,) The input data or samples for prediction. @@ -401,7 +400,7 @@ def predict_set( **kwargs: dict Additional keyword arguments. - Returns: + Returns -------- The output structure depend on the ``get_bounds`` method. The prediction sets for each sample and each alpha level. diff --git a/mapie/conformity_scores/sets/aps.py b/mapie/conformity_scores/sets/aps.py index 8e5cb7d27..9847f8b7d 100644 --- a/mapie/conformity_scores/sets/aps.py +++ b/mapie/conformity_scores/sets/aps.py @@ -53,7 +53,7 @@ def get_predictions( """ Get predictions from an EnsembleClassifier. - Parameters: + Parameters ----------- X: NDArray of shape (n_samples, n_features) Observed feature values. @@ -72,7 +72,7 @@ def get_predictions( By default ``"mean"``. - Returns: + Returns -------- NDArray Array of predictions. @@ -178,7 +178,7 @@ def get_conformity_score_quantiles( """ Get the quantiles of the conformity scores for each uncertainty level. - Parameters: + Parameters ----------- conformity_scores: NDArray of shape (n_samples,) Conformity scores for each sample. @@ -197,7 +197,7 @@ def get_conformity_score_quantiles( By default ``"mean"``. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. @@ -222,7 +222,7 @@ def _compute_v_parameter( """ Compute the V parameters from Romano+(2020). - Parameters: + Parameters ----------- y_proba_last_cumsumed: NDArray of shape (n_samples, n_alpha) Cumulated score of the last included label. @@ -236,7 +236,7 @@ def _compute_v_parameter( predicition_sets: NDArray of shape (n_samples, n_alpha) Prediction sets. - Returns: + Returns -------- NDArray of shape (n_samples, n_alpha) Vs parameters. @@ -337,7 +337,7 @@ def get_prediction_sets( Generate prediction sets based on the probability predictions, the conformity scores and the uncertainty level. - Parameters: + Parameters ----------- y_pred_proba: NDArray of shape (n_samples, n_classes) Target prediction. @@ -365,7 +365,7 @@ def get_prediction_sets( By default, ``True``. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. diff --git a/mapie/conformity_scores/sets/lac.py b/mapie/conformity_scores/sets/lac.py index bf5bcbd01..e5f088158 100644 --- a/mapie/conformity_scores/sets/lac.py +++ b/mapie/conformity_scores/sets/lac.py @@ -87,7 +87,7 @@ def get_predictions( """ Get predictions from an EnsembleClassifier. - Parameters: + Parameters ----------- X: NDArray of shape (n_samples, n_features) Observed feature values. @@ -106,7 +106,7 @@ def get_predictions( By default ``"mean"``. - Returns: + Returns -------- NDArray Array of predictions. @@ -131,7 +131,7 @@ def get_conformity_score_quantiles( """ Get the quantiles of the conformity scores for each uncertainty level. - Parameters: + Parameters ----------- conformity_scores: NDArray of shape (n_samples,) Conformity scores for each sample. @@ -150,7 +150,7 @@ def get_conformity_score_quantiles( By default ``"mean"``. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. @@ -180,7 +180,7 @@ def get_prediction_sets( Generate prediction sets based on the probability predictions, the conformity scores and the uncertainty level. - Parameters: + Parameters ----------- y_pred_proba: NDArray of shape (n_samples, n_classes) Target prediction. @@ -202,7 +202,7 @@ def get_prediction_sets( By default ``"mean"``. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. diff --git a/mapie/conformity_scores/sets/naive.py b/mapie/conformity_scores/sets/naive.py index 19b0e42c9..09bafa181 100644 --- a/mapie/conformity_scores/sets/naive.py +++ b/mapie/conformity_scores/sets/naive.py @@ -67,7 +67,7 @@ def get_predictions( """ Get predictions from an EnsembleClassifier. - Parameters: + Parameters ----------- X: NDArray of shape (n_samples, n_features) Observed feature values. @@ -79,7 +79,7 @@ def get_predictions( estimator: EnsembleClassifier Estimator that is fitted to predict y from X. - Returns: + Returns -------- NDArray Array of predictions. @@ -101,7 +101,7 @@ def get_conformity_score_quantiles( """ Get the quantiles of the conformity scores for each uncertainty level. - Parameters: + Parameters ----------- conformity_scores: NDArray of shape (n_samples,) Conformity scores for each sample. @@ -113,7 +113,7 @@ def get_conformity_score_quantiles( estimator: EnsembleClassifier Estimator that is fitted to predict y from X. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. @@ -241,7 +241,7 @@ def get_prediction_sets( Generate prediction sets based on the probability predictions, the conformity scores and the uncertainty level. - Parameters: + Parameters ----------- y_pred_proba: NDArray of shape (n_samples, n_classes) Target prediction. @@ -256,7 +256,7 @@ def get_prediction_sets( estimator: EnsembleClassifier Estimator that is fitted to predict y from X. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. diff --git a/mapie/conformity_scores/sets/raps.py b/mapie/conformity_scores/sets/raps.py index 1c39aed8f..435c135ba 100644 --- a/mapie/conformity_scores/sets/raps.py +++ b/mapie/conformity_scores/sets/raps.py @@ -388,7 +388,7 @@ def get_conformity_score_quantiles( """ Get the quantiles of the conformity scores for each uncertainty level. - Parameters: + Parameters ----------- conformity_scores: NDArray of shape (n_samples,) Conformity scores for each sample. @@ -435,7 +435,7 @@ def get_conformity_score_quantiles( By default, "None" but must be set to work. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. @@ -549,7 +549,7 @@ def _compute_v_parameter( """ Compute the V parameters from Angelopoulos+(2020). - Parameters: + Parameters ----------- y_proba_last_cumsumed: NDArray of shape (n_samples, n_alpha) Cumulated score of the last included label. @@ -563,7 +563,7 @@ def _compute_v_parameter( predicition_sets: NDArray of shape (n_samples, n_alpha) Prediction sets. - Returns: + Returns -------- NDArray of shape (n_samples, n_alpha) Vs parameters. diff --git a/mapie/conformity_scores/sets/topk.py b/mapie/conformity_scores/sets/topk.py index 4e86a2671..cfad29a0a 100644 --- a/mapie/conformity_scores/sets/topk.py +++ b/mapie/conformity_scores/sets/topk.py @@ -92,7 +92,7 @@ def get_predictions( This method should be implemented by any subclass of the current class. - Parameters: + Parameters ----------- X: NDArray of shape (n_samples, n_features) Observed feature values. @@ -104,7 +104,7 @@ def get_predictions( estimator: EnsembleClassifier Estimator that is fitted to predict y from X. - Returns: + Returns -------- NDArray Array of predictions. @@ -126,7 +126,7 @@ def get_conformity_score_quantiles( """ Get the quantiles of the conformity scores for each uncertainty level. - Parameters: + Parameters ----------- conformity_scores: NDArray of shape (n_samples,) Conformity scores for each sample. @@ -138,7 +138,7 @@ def get_conformity_score_quantiles( estimator: EnsembleClassifier Estimator that is fitted to predict y from X. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. @@ -157,7 +157,7 @@ def get_prediction_sets( Generate prediction sets based on the probability predictions, the conformity scores and the uncertainty level. - Parameters: + Parameters ----------- y_pred_proba: NDArray of shape (n_samples, n_classes) Target prediction. @@ -172,7 +172,7 @@ def get_prediction_sets( estimator: EnsembleClassifier Estimator that is fitted to predict y from X. - Returns: + Returns -------- NDArray Array of quantiles with respect to alpha_np. diff --git a/mapie/metrics.py b/mapie/metrics.py index 20c5065f0..9fb2b0938 100644 --- a/mapie/metrics.py +++ b/mapie/metrics.py @@ -41,7 +41,7 @@ def regression_coverage_score( Effective coverage obtained by the prediction intervals. Examples - -------- + --------- >>> from mapie.metrics import regression_coverage_score >>> import numpy as np >>> y_true = np.array([5, 7.5, 9.5, 10.5, 12.5]) @@ -1175,8 +1175,8 @@ def kolmogorov_smirnov_statistic(y_true: NDArray, y_score: NDArray) -> float: The Journal of Machine Learning Research. 2022 Jan 1;23(1):15886-940. - Example - ------- + Examples + -------- >>> import numpy as np >>> from mapie.metrics import kolmogorov_smirnov_statistic >>> y_true = np.array([0, 1, 0, 1, 0]) @@ -1231,8 +1231,8 @@ def kolmogorov_smirnov_cdf(x: float) -> float: Ann. Math. Statist. 24 (4) 624 - 639, December, 1953. - Example - ------- + Examples + -------- >>> import numpy as np >>> from mapie.metrics import kolmogorov_smirnov_cdf >>> print(np.round(kolmogorov_smirnov_cdf(1), 4)) @@ -1282,8 +1282,8 @@ def kolmogorov_smirnov_p_value(y_true: NDArray, y_score: NDArray) -> float: Ann. Math. Statist. 24 (4) 624 - 639, December, 1953. - Example - ------- + Examples + -------- >>> import pandas as pd >>> from mapie.metrics import kolmogorov_smirnov_p_value >>> y_true = np.array([1, 0, 1, 0, 1, 0]) @@ -1333,8 +1333,8 @@ def kuiper_statistic(y_true: NDArray, y_score: NDArray) -> float: The Journal of Machine Learning Research. 2022 Jan 1;23(1):15886-940. - Example - ------- + Examples + -------- >>> import numpy as np >>> from mapie.metrics import kuiper_statistic >>> y_true = np.array([0, 1, 0, 1, 0]) @@ -1388,8 +1388,8 @@ def kuiper_cdf(x: float) -> float: Ann. Math. Statist. 22 (3) 427 - 432 September, 1951. - Example - ------- + Examples + -------- >>> import numpy as np >>> from mapie.metrics import kuiper_cdf >>> print(np.round(kuiper_cdf(1), 4)) @@ -1449,8 +1449,8 @@ def kuiper_p_value(y_true: NDArray, y_score: NDArray) -> float: Ann. Math. Statist. 22 (3) 427 - 432 September, 1951. - Example - ------- + Examples + -------- >>> import pandas as pd >>> from mapie.metrics import kuiper_p_value >>> y_true = np.array([1, 0, 1, 0, 1, 0]) @@ -1499,8 +1499,8 @@ def spiegelhalter_statistic(y_true: NDArray, y_score: NDArray) -> float: Statistics in medicine. 1986 Sep;5(5):421-33. - Example - ------- + Examples + -------- >>> import numpy as np >>> from mapie.metrics import spiegelhalter_statistic >>> y_true = np.array([0, 1, 0, 1, 0]) @@ -1556,8 +1556,8 @@ def spiegelhalter_p_value(y_true: NDArray, y_score: NDArray) -> float: Statistics in medicine. 1986 Sep;5(5):421-33. - Example - ------- + Examples + -------- >>> import numpy as np >>> from mapie.metrics import spiegelhalter_p_value >>> y_true = np.array([1, 0, 1, 0, 1, 0]) diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index e30646ab3..df04a41d1 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -686,8 +686,8 @@ def predict( - NDArray of shape (n_samples,) if ``alpha`` is ``None``. - Tuple[NDArray, NDArray] of shapes (n_samples,) and (n_samples, 2, n_alpha) if ``alpha`` is not ``None``. - - [:, 0, :]: Lower bound of the prediction interval. - - [:, 1, :]: Upper bound of the prediction interval. + - [:, 0, :]: Lower bound of the prediction interval. + - [:, 1, :]: Upper bound of the prediction interval. """ check_is_fitted(self, self.fit_attributes) check_defined_variables_predict_cqr(ensemble, alpha) diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index aa6656e81..8d6e10ffc 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -67,9 +67,9 @@ class MapieRegressor(BaseEstimator, RegressorMixin): ``sklearn.model_selection.LeaveOneOut()``. - CV splitter: any ``sklearn.model_selection.BaseCrossValidator`` Main variants are: - - ``sklearn.model_selection.LeaveOneOut`` (jackknife), - - ``sklearn.model_selection.KFold`` (cross-validation), - - ``subsample.Subsample`` object (bootstrap). + - ``sklearn.model_selection.LeaveOneOut`` (jackknife), + - ``sklearn.model_selection.KFold`` (cross-validation), + - ``subsample.Subsample`` object (bootstrap). - ``"split"``, does not involve cross-validation but a division of the data into training and calibration subsets. The splitter used is the following: ``sklearn.model_selection.ShuffleSplit``. @@ -624,8 +624,8 @@ def predict( - NDArray of shape (n_samples,) if ``alpha`` is ``None``. - Tuple[NDArray, NDArray] of shapes (n_samples,) and (n_samples, 2, n_alpha) if ``alpha`` is not ``None``. - - [:, 0, :]: Lower bound of the prediction interval. - - [:, 1, :]: Upper bound of the prediction interval. + - [:, 0, :]: Lower bound of the prediction interval. + - [:, 1, :]: Upper bound of the prediction interval. """ # Checks if hasattr(self, '_predict_params'): diff --git a/mapie/regression/time_series_regression.py b/mapie/regression/time_series_regression.py index a2c76ce95..e4e6f5520 100644 --- a/mapie/regression/time_series_regression.py +++ b/mapie/regression/time_series_regression.py @@ -451,8 +451,8 @@ def predict( - NDArray of shape (n_samples,) if ``alpha`` is ``None``. - Tuple[NDArray, NDArray] of shapes (n_samples,) and (n_samples, 2, n_alpha) if ``alpha`` is not ``None``. - - [:, 0, :]: Lower bound of the prediction interval. - - [:, 1, :]: Upper bound of the prediction interval. + - [:, 0, :]: Lower bound of the prediction interval. + - [:, 1, :]: Upper bound of the prediction interval. """ if alpha is None: super().predict( diff --git a/mapie/subsample.py b/mapie/subsample.py index ed3c3ba4e..88293bc5e 100644 --- a/mapie/subsample.py +++ b/mapie/subsample.py @@ -170,6 +170,7 @@ def split( The training set indices for that split. test : NDArray of shape (n_indices_test,) The testing set indices for that split. + Raises ------ ValueError