DOC - Fix most documentation build warnings (#539)

* DOC - Fixing a good 90% of existing warnings * DOC - Update contributing guidelines regarding documentation
scikit-learn-contrib · Nov 20, 2024 · 58e839c · 58e839c
1 parent c2e08c6
commit 58e839c
Show file tree

Hide file tree

Showing 23 changed files with 132 additions and 126 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -28,4 +28,4 @@ Please describe the tests that you ran to verify your changes. Provide instructi
 - [ ] Typing passes successfully : `make type-check`
 - [ ] Unit tests pass successfully : `make tests`
 - [ ] Coverage is 100% : `make coverage`
-- [ ] Documentation builds successfully : `make doc`
+- [ ] Documentation builds successfully and without warnings : `make doc`
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
@@ -48,7 +48,7 @@ Finally, install `mapie` in development mode:
 Documenting your change
 -----------------------
 
-If you're adding a class or a function, then you'll need to add a docstring with a doctest. We follow the `numpy docstring convention <https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html>`_, so please do too.
+If you're adding a public class or function, then you'll need to add a docstring with a doctest. We follow the `numpy docstring convention <https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html>`_, so please do too.
 Any estimator should follow the `scikit-learn API <https://scikit-learn.org/stable/developers/develop.html>`_, so please follow these guidelines.
 
 In order to build the documentation locally, you first need to install some dependencies:

diff --git a/HISTORY.rst b/HISTORY.rst
@@ -8,6 +8,7 @@ History
 * Fix issue 525 in contribution guidelines with syntax errors in hyperlinks and other formatting issues.
 * Bump wheel version to avoid known security vulnerabilities
 * Fix issue 495 to center correctly the prediction intervals
+* Fix most documentation build warnings
 
 0.9.1 (2024-09-13)
 ------------------

diff --git a/doc/conf.py b/doc/conf.py
@@ -67,6 +67,9 @@
 # generate autosummary even if no references
 autosummary_generate = True
 
+
+autosectionlabel_prefix_document = True
+
 # The suffix of source filenames.
 source_suffix = ".rst"
 

diff --git a/examples/mondrian/1-quickstart/plot_main-tutorial-mondrian-regression.py b/examples/mondrian/1-quickstart/plot_main-tutorial-mondrian-regression.py
@@ -36,7 +36,7 @@
 
 ##############################################################################
 # 1. Create the noisy dataset
-# -----------------------------
+# ----------------------------------------------------------------------------
 # We create a dataset with 10 groups, each of those groups having a different
 # level of noise.
 
@@ -87,7 +87,7 @@
 
 ##############################################################################
 # 2. Split the dataset into a training set, a calibration set, and a test set.
-# -----------------------------
+# ----------------------------------------------------------------------------
 
 X_train_temp, X_test, y_train_temp, y_test = train_test_split(
     X, y, test_size=0.2, random_state=0
@@ -119,15 +119,15 @@
 
 ##############################################################################
 # 3. Fit a random forest regressor on the training set.
-# -----------------------------
+# ----------------------------------------------------------------------------
 
 rf = RandomForestRegressor(n_estimators=100)
 rf.fit(X_train, y_train)
 
 
 ##############################################################################
 # 4. Fit a MapieRegressor and a MondrianCP on the calibration set.
-# -----------------------------
+# ----------------------------------------------------------------------------
 
 mapie_regressor = MapieRegressor(rf, cv="prefit")
 mondrian_regressor = MondrianCP(MapieRegressor(rf, cv="prefit"))
@@ -137,7 +137,7 @@
 
 ##############################################################################
 # 5. Predict the prediction intervals on the test set with both methods.
-# -----------------------------
+# ----------------------------------------------------------------------------
 
 _, y_pss_split = mapie_regressor.predict(X_test, alpha=.1)
 _, y_pss_mondrian = mondrian_regressor.predict(
@@ -147,7 +147,7 @@
 
 ##############################################################################
 # 6. Compare the coverage by partition, plot both methods side by side.
-# -----------------------------
+# ----------------------------------------------------------------------------
 
 coverages = {}
 for group in np.unique(partition_test):

diff --git a/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py b/examples/multilabel_classification/1-quickstart/plot_tutorial_multilabel_classification.py
@@ -27,7 +27,7 @@
 
 ##############################################################################
 # 1. Construction of the dataset
-# -----------------------------
+# ----------------------------------------------------------------------------
 # We use a two-dimensional toy dataset with three possible labels. The idea
 # is to create a triangle where the observations on the edges have only one
 # label, those on the vertices have two labels (those of the two edges) and the
@@ -94,21 +94,21 @@
 
 ##############################################################################
 # 2 Recall control risk with CRC and RCPS
-# ---------------------------------------
+# ----------------------------------------------------------------------------
 # 2.1 Fitting MapieMultiLabelClassifier
-# ------------------------------------
+# ----------------------------------------------------------------------------
 # MapieMultiLabelClassifier will be fitted with RCPS and CRC methods. For the
 # RCPS method, we will test all three Upper Confidence Bounds (Hoeffding,
 # Bernstein and Waudby-Smith–Ramdas).
 # The two methods give two different guarantees on the risk:
 #
 # * RCPS: :math:`P(R(\mathcal{T}_{\hat{\lambda}})\leq\alpha)\geq 1-\delta`
-# where :math:`R(\mathcal{T}_{\hat{\lambda}})`
-# is the risk we want to control and :math:`\alpha` is the desired risk
+#   where :math:`R(\mathcal{T}_{\hat{\lambda}})`
+#   is the risk we want to control and :math:`\alpha` is the desired risk
 #
 # * CRC: :math:`\mathbb{E}\left[L_{n+1}(\hat{\lambda})\right] \leq \alpha`
-# where :math:`L_{n+1}(\hat{\lambda})` is the risk of a new observation and
-# :math:`\alpha` is the desired risk
+#   where :math:`L_{n+1}(\hat{\lambda})` is the risk of a new observation and
+#   :math:`\alpha` is the desired risk
 #
 # In both cases, the objective of the method is to find the optimal value of
 # :math:`\lambda` (threshold above which we consider a label as being present)
@@ -148,17 +148,17 @@
 
 ##############################################################################
 # 2.2. Results
-# ----------
+# ----------------------------------------------------------------------------
 # To check the results of the methods, we propose two types of plots:
 #
-# * Plots where the confidence level varies. Here two metrics are plotted
-#   for each method and for each UCB
-#     * The actual recall (which should be always near to the required one):
-#       we can see that they are close to each other.
-#     * The value of the threshold: we see that the threshold is decreasing as
-#       :math:`1 - \alpha` increases, which is what is expected because a
-#       smaller threshold will give larger prediction sets, hence a larger
-#       recall.
+# 1 - Plots where the confidence level varies. Here two metrics are plotted
+# for each method and for each UCB
+# * The actual recall (which should be always near to the required one):
+# we can see that they are close to each other.
+# * The value of the threshold: we see that the threshold is decreasing as
+# :math:`1 - \alpha` increases, which is what is expected because a
+# smaller threshold will give larger prediction sets, hence a larger
+# recall.
 #
 
 vars_y = [recalls, thresholds]
@@ -177,15 +177,15 @@
 plt.show()
 
 ##############################################################################
-# * Plots where we choose a specific risk value (0.1 in our case) and look at
-#   the average risk, the UCB of the risk (for RCPS methods) and the choice of
-#   the threshold :math:`\lambda`
-#     * We can see that among the RCPS methods, the Bernstein method
-#       gives the best results as for a given value of :math:`\alpha`
-#       as we are above the required recall but with a larger value of
-#       :math:`\lambda` than the two others bounds.
-#     * The CRC method gives the best results since it guarantees the coverage
-#       with a larger threshold.
+# 2 - Plots where we choose a specific risk value (0.1 in our case) and look at
+# the average risk, the UCB of the risk (for RCPS methods) and the choice of
+# the threshold :math:`\lambda`
+# * We can see that among the RCPS methods, the Bernstein method
+# gives the best results as for a given value of :math:`\alpha`
+# as we are above the required recall but with a larger value of
+# :math:`\lambda` than the two others bounds.
+# * The CRC method gives the best results since it guarantees the coverage
+# with a larger threshold.
 
 fig, axs = plt.subplots(
     1,
@@ -216,9 +216,9 @@
 
 ##############################################################################
 # 3. Precision control risk with LTT
-# ------------------
+# ----------------------------------------------------------------------------
 # 3.1 Fitting MapieMultilabelClassifier
-# -------------------------------------
+# ----------------------------------------------------------------------------
 #
 # In this part, we will use LTT to control precision.
 # At the opposite of the 2 previous method, LTT can handle non-monotonous loss.
@@ -266,7 +266,7 @@
 
 ##############################################################################
 # 3.2 Valid parameters for precision control
-# ------------------------------------------
+# ----------------------------------------------------------------------------
 # We can see that not all :math:`\lambda` such that risk is below the orange
 # line are choosen by the procedure. Otherwise, all the lambdas that are
 # in the red rectangle verify family wise error rate control and allow to

diff --git a/examples/regression/2-advanced-analysis/plot_conditional_coverage.py b/examples/regression/2-advanced-analysis/plot_conditional_coverage.py
@@ -171,15 +171,15 @@ def sin_with_controlled_noise(
 # adaptive conformal methods ?". For this we have the two metrics
 # :func:`~mapie.metrics.regression_ssc_score` and :func:`~mapie.metrics.hsic`.
 # - SSC (Size Stratified Coverage) is the maximum violation of the coverage :
-#   the intervals are grouped by width and the coverage is computed for each
-#   group. The lower coverage is the maximum coverage violation. An adaptive
-#   method is one where this maximum violation is as close as possible to the
-#   global coverage. If we interpret the result for the four methods here :
-#   CV+ seems to be the better one.
+# the intervals are grouped by width and the coverage is computed for each
+# group. The lower coverage is the maximum coverage violation. An adaptive
+# method is one where this maximum violation is as close as possible to the
+# global coverage. If we interpret the result for the four methods here :
+# CV+ seems to be the better one.
 # - And with the hsic correlation coefficient, we have the
-#   same interpretation : :func:`~mapie.metrics.hsic` computes the correlation
-#   between the coverage indicator and the interval size, a value of 0
-#   translates an independence between the two.
+# same interpretation : :func:`~mapie.metrics.hsic` computes the correlation
+# between the coverage indicator and the interval size, a value of 0
+# translates an independence between the two.
 #
 # We would like to highlight here the misinterpretation that can be made
 # with these metrics. In fact, here CV+ with the absolute residual score

diff --git a/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py b/examples/regression/2-advanced-analysis/plot_conformal_predictive_distribution.py
@@ -54,7 +54,7 @@
 
 ##############################################################################
 # 2. Defining a Conformal Predictive Distribution class with MAPIE
-# ----------------------------------------------------------
+# ------------------------------------------------------------------
 #
 # To be able to obtain the cumulative distribution function of
 # a prediction with MAPIE, we propose here to wrap the

diff --git a/examples/regression/4-tutorials/plot_main-tutorial-regression.py b/examples/regression/4-tutorials/plot_main-tutorial-regression.py
@@ -9,9 +9,12 @@
 
 - How well do the MAPIE strategies capture the aleatoric uncertainty
   existing in the data?
+
 - How do the prediction intervals estimated by the resampling strategies
   evolve for new *out-of-distribution* data ?
+
 - How do the prediction intervals vary between regressor models ?
+
 Throughout this tutorial, we estimate the prediction intervals first using
 a polynomial function, and then using a boosting model, and a simple neural
 network.

diff --git a/mapie/calibration.py b/mapie/calibration.py
@@ -34,10 +34,8 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin):
         If ``None``, estimator defaults to a ``LogisticRegression`` instance.
 
     method: Optional[str]
-        Method to choose for calibration method.
-        Choose among:
-
-        - "top_label", performs a calibration on the class with highest score
+        The only valid method is "top_label".
+        Performs a calibration on the class with highest score
         given both score and class, see section 2 of [1].
 
         By default "top_label".
@@ -54,7 +52,8 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin):
         The cross-validation strategy to compute scores :
 
         - "split", performs a standard splitting into a calibration and a
-        test set.
+          test set.
+
         - "prefit", assumes that ``estimator`` has been fitted already.
           All the data that are provided in the ``fit`` method are then used
           to calibrate the predictions through the score computation.

diff --git a/mapie/conformity_scores/bounds/residuals.py b/mapie/conformity_scores/bounds/residuals.py
@@ -17,8 +17,8 @@ class ResidualNormalisedScore(BaseRegressionScore):
     """
     Residual Normalised score.
 
-    The signed conformity score = (|y - y_pred|) / r_pred. r_pred being the
-    predicted residual (|y - y_pred|) of the base estimator.
+    The signed conformity score = abs(y - y_pred) / r_pred. r_pred being the
+    predicted residual abs(y - y_pred) of the base estimator.
     It is calculated by a model that learns to predict these residuals.
     The learning is done with the log of the residual and we use the
     exponential of the prediction to avoid negative values.

diff --git a/mapie/conformity_scores/classification.py b/mapie/conformity_scores/classification.py
@@ -61,7 +61,7 @@ def get_predictions(
 
         This method should be implemented by any subclass of the current class.
 
-        Parameters:
+        Parameters
         -----------
         X: NDArray of shape (n_samples, n_features)
             Observed feature values.
@@ -73,7 +73,7 @@ def get_predictions(
         estimator: EnsembleClassifier
             Estimator that is fitted to predict y from X.
 
-        Returns:
+        Returns
         --------
         NDArray
             Array of predictions.
@@ -92,7 +92,7 @@ def get_conformity_score_quantiles(
 
         This method should be implemented by any subclass of the current class.
 
-        Parameters:
+        Parameters
         -----------
         conformity_scores: NDArray of shape (n_samples,)
             Conformity scores for each sample.
@@ -104,7 +104,7 @@ def get_conformity_score_quantiles(
         estimator: EnsembleClassifier
             Estimator that is fitted to predict y from X.
 
-        Returns:
+        Returns
         --------
         NDArray
             Array of quantiles with respect to alpha_np.
@@ -125,7 +125,7 @@ def get_prediction_sets(
 
         This method should be implemented by any subclass of the current class.
 
-        Parameters:
+        Parameters
         -----------
         y_pred_proba: NDArray of shape (n_samples, n_classes)
             Target prediction.
@@ -140,7 +140,7 @@ def get_prediction_sets(
         estimator: EnsembleClassifier
             Estimator that is fitted to predict y from X.
 
-        Returns:
+        Returns
         --------
         NDArray
             Array of quantiles with respect to alpha_np.
@@ -205,7 +205,7 @@ def predict_set(
         Compute the prediction sets on new samples based on the uncertainty of
         the target confidence set.
 
-        Parameters:
+        Parameters
         -----------
         X: NDArray of shape (n_samples,)
             The input data or samples for prediction.
@@ -216,7 +216,7 @@ def predict_set(
         **kwargs: dict
             Additional keyword arguments.
 
-        Returns:
+        Returns
         --------
         The output structure depend on the ``get_sets`` method.
             The prediction sets for each sample and each alpha level.

diff --git a/mapie/conformity_scores/regression.py b/mapie/conformity_scores/regression.py
@@ -23,18 +23,17 @@ class BaseRegressionScore(BaseConformityScore, metaclass=ABCMeta):
         Whether to consider the conformity score as symmetrical or not.
 
     consistency_check: bool, optional
-        Whether to check the consistency between the methods
-        ``get_estimation_distribution`` and ``get_conformity_scores``.
-        If ``True``, the following equality must be verified:
-        ``self.get_estimation_distribution(
-            y_pred, self.get_conformity_scores(y, y_pred, **kwargs), **kwargs
-        ) == y``
+        Whether to check the consistency between the
+        methods ``get_estimation_distribution`` and ``get_conformity_scores``.
+        If ``True``, ``self.get_estimation_distribution`` called with params
+        ``y_pred`` and ``self.get_conformity_scores(y, y_pred, **kwargs)`` must
+        be equal to ``y``.
 
         By default ``True``.
 
     eps: float, optional
-        Threshold to consider when checking the consistency between
-        ``get_estimation_distribution`` and ``get_conformity_scores``.
+        Threshold to consider when checking the consistency
+        between ``get_estimation_distribution`` and ``get_conformity_scores``.
         It should be specified if ``consistency_check==True``.
 
         By default, it is defined by the default precision.
@@ -390,7 +389,7 @@ def predict_set(
         Compute the prediction sets on new samples based on the uncertainty of
         the target confidence set.
 
-        Parameters:
+        Parameters
         -----------
         X: NDArray of shape (n_samples,)
             The input data or samples for prediction.
@@ -401,7 +400,7 @@ def predict_set(
         **kwargs: dict
             Additional keyword arguments.
 
-        Returns:
+        Returns
         --------
         The output structure depend on the ``get_bounds`` method.
             The prediction sets for each sample and each alpha level.