diff --git a/doc/images/quickstart_1.png b/doc/images/quickstart_1.png index 10d37164..0d74e1bf 100644 Binary files a/doc/images/quickstart_1.png and b/doc/images/quickstart_1.png differ diff --git a/doc/quick_start.rst b/doc/quick_start.rst index f556ac0b..7ef35f8a 100644 --- a/doc/quick_start.rst +++ b/doc/quick_start.rst @@ -36,18 +36,24 @@ To install directly from the github repository : Let us start with a basic regression problem. Here, we generate one-dimensional noisy data that we fit with a linear model. +.. + Comment to developers: the following piece of code is duplicated in `examples/regression/1-quickstart/plot_toy_model.py`. + When updating it, please replicate the changes to this other file. + .. testcode:: import numpy as np - from sklearn.linear_model import LinearRegression from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split - regressor = LinearRegression() - X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59) - X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split(X, y, test_size=0.5) - X_train, X_conformalize, y_train, y_conformalize = train_test_split(X_train_conformalize, y_train_conformalize, - test_size=0.5) + RANDOM_STATE = 42 + X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE) + X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split( + X, y, test_size=0.5, random_state=RANDOM_STATE + ) + X_train, X_conformalize, y_train, y_conformalize = train_test_split( + X_train_conformalize, y_train_conformalize, test_size=0.5, random_state=RANDOM_STATE + ) # We follow a sequential ``fit``, ``conformalize``, and ``predict`` process. # We set the confidence level to estimate prediction intervals at approximately one and two @@ -56,7 +62,6 @@ Here, we generate one-dimensional noisy data that we fit with a linear model. from mapie_v1.regression import SplitConformalRegressor mapie_regressor = SplitConformalRegressor( - regressor, confidence_level=[0.95, 0.68], prefit=False, ) @@ -95,9 +100,9 @@ Here, we generate one-dimensional noisy data that we fit with a linear model. alpha=0.2 ) plt.title( - f"Target and effective coverages for " + f"Effective coverage for " f"confidence_level={confidence_level[0]:.2f}: {coverage_scores[0]:.3f}\n" - f"Target and effective coverages for " + f"Effective coverage for " f"confidence_level={confidence_level[1]:.2f}: {coverage_scores[1]:.3f}" ) plt.show() diff --git a/examples/regression/1-quickstart/plot_toy_model.py b/examples/regression/1-quickstart/plot_toy_model.py index 047ca02d..7b52b82c 100644 --- a/examples/regression/1-quickstart/plot_toy_model.py +++ b/examples/regression/1-quickstart/plot_toy_model.py @@ -1,54 +1,58 @@ """ ===================================================================================== -[Pre-v1] Plotting MAPIE prediction intervals with a toy dataset +Plotting MAPIE prediction intervals with a toy dataset ===================================================================================== -**Note: we recently released MAPIE v1.0.0, which introduces breaking API changes.** -**This notebook hasn't been updated to the new API yet.** - -An example plot of :class:`~mapie.regression.MapieRegressor` used +An example plot of :class:`~mapie_v1.regression.SplitConformalRegressor` used in the Quickstart. """ import numpy as np from matplotlib import pyplot as plt from sklearn.datasets import make_regression -from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split from mapie.metrics import regression_coverage_score -from mapie.regression import MapieRegressor +from mapie_v1.regression import SplitConformalRegressor RANDOM_STATE = 42 -regressor = LinearRegression() -X, y = make_regression( - n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE +X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=RANDOM_STATE) +X_train_conformalize, X_test, y_train_conformalize, y_test = train_test_split( + X, y, test_size=0.5, random_state=RANDOM_STATE +) +X_train, X_conformalize, y_train, y_conformalize = train_test_split( + X_train_conformalize, y_train_conformalize, test_size=0.5, random_state=RANDOM_STATE ) -alpha = [0.05, 0.32] -mapie = MapieRegressor(regressor, method="plus", random_state=RANDOM_STATE) -mapie.fit(X, y) -y_pred, y_pis = mapie.predict(X, alpha=alpha) +confidence_level = [0.95, 0.68] +mapie_regressor = SplitConformalRegressor( + confidence_level=confidence_level, prefit=False +) +mapie_regressor.fit(X_train, y_train) +mapie_regressor.conformalize(X_conformalize, y_conformalize) +y_pred, y_pred_interval = mapie_regressor.predict_interval(X_test) coverage_scores = [ - regression_coverage_score(y, y_pis[:, 0, i], y_pis[:, 1, i]) - for i, _ in enumerate(alpha) + regression_coverage_score( + y_test, y_pred_interval[:, 0, i], y_pred_interval[:, 1, i] + ) for i, _ in enumerate(confidence_level) ] plt.xlabel("x") plt.ylabel("y") -plt.scatter(X, y, alpha=0.3) -plt.plot(X, y_pred, color="C1") -order = np.argsort(X[:, 0]) -plt.plot(X[order], y_pis[order][:, 0, 1], color="C1", ls="--") -plt.plot(X[order], y_pis[order][:, 1, 1], color="C1", ls="--") +plt.scatter(X_test, y_test, alpha=0.3) +plt.plot(X_test, y_pred, color="C1") +order = np.argsort(X_test[:, 0]) +plt.plot(X_test[order], y_pred_interval[order][:, 0, 1], color="C1", ls="--") +plt.plot(X_test[order], y_pred_interval[order][:, 1, 1], color="C1", ls="--") plt.fill_between( - X[order].ravel(), - y_pis[order][:, 0, 0].ravel(), - y_pis[order][:, 1, 0].ravel(), + X_test[order].ravel(), + y_pred_interval[order][:, 0, 0].ravel(), + y_pred_interval[order][:, 1, 0].ravel(), alpha=0.2, ) plt.title( - f"Target and effective coverages for " - f"alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})\n" - f"Target and effective coverages for " - f"alpha={alpha[1]:.2f}: ({1-alpha[1]:.3f}, {coverage_scores[1]:.3f})" + f"Effective coverage for " + f"confidence_level={confidence_level[0]:.2f}: {coverage_scores[0]:.3f}\n" + f"Effective coverage for " + f"confidence_level={confidence_level[1]:.2f}: {coverage_scores[1]:.3f}" ) plt.show()