diff --git a/docs/src/methods_list.rst b/docs/src/methods_list.rst
deleted file mode 100644
index 18485f40a..000000000
--- a/docs/src/methods_list.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-.. _methods_list:
-
-
-===============
-List of methods
-===============
\ No newline at end of file
diff --git a/docs/src/model_agnostic_methods.rst b/docs/src/model_agnostic_methods.rst
new file mode 100644
index 000000000..b4383ce52
--- /dev/null
+++ b/docs/src/model_agnostic_methods.rst
@@ -0,0 +1,11 @@
+.. _model_agnostic_methods:
+
+
+======================
+Model-agnostic methods
+======================
+
+.. toctree::
+   :maxdepth: 2
+
+   model_agnostic_methods/pdp_importance.rst
\ No newline at end of file
diff --git a/docs/src/model_agnostic_methods/pdp_importance.rst b/docs/src/model_agnostic_methods/pdp_importance.rst
new file mode 100644
index 000000000..db1f7a1bc
--- /dev/null
+++ b/docs/src/model_agnostic_methods/pdp_importance.rst
@@ -0,0 +1,51 @@
+
+.. _pdp_importance:
+
+============================
+PDP-based feature importance
+============================
+
+A Partial Dependence Plot (PDP) :footcite:`friedman2001greedy` is a tool used to visualize 
+the dependence between the target variable and a feature (or set of features) of interest.
+To see how they are used to understand the relationship between features and the target,
+see :ref:`partial_dependance_plots`. Here we will see how a marginal measure of feature
+importance can be derived from PDPs.
+
+A PDP is a plot of the partial dependence function that defined as
+
+.. math::
+    f_S(x_S) &= \mathbb{E}_{X_{-S}}\left[ f(x_S, X_{-S}) \right]\\
+             &= \int f(x_S, x_{-S}) d\mathbb{P}(X_{-S}),
+
+and estimated with 
+
+.. math::
+    \bar{f}_S(x_S) \approx \frac{1}{n_\text{n}} \sum_{i=1}^n f(x_S, x_{-S,i}).
+
+It will show how much the value of the target changes when a (or multiple) feature varies. Intuitively, the
+flatter the PDP, the less importance a feature should have as it appears to have little impact
+on the target. On the other hand, the more a PDP varies, the more signal on the target should
+be present in the feature.
+Greenwell, Boehmke, and McCarthy :footcite:`greenwell2018simple` propose the following measure
+of feature importance for regression:
+
+.. math::
+    \Psi^{PDP}_S = \sqrt{ \frac{1}{K-1} \sum_{k=1}^K (\bar{f}_S(x_S^k) - \frac{1}{K} \sum_{k=1}^K \bar{f}_S(x_S^k))^2 }.
+
+It corresponds to the deviation of each unique feature value from the average curve.
+
+In classification they suggest:
+
+.. math::
+    \Psi^{PDP}_S = \frac{ \max_k(\bar{f}_S(x_S^k)) - \min_k(\bar{f}_S(x_S^k)) }{4}.
+
+It is an estimation of the deviation based on the range of values, based on the fact that
+for the normal distribution, roughly 95% of the of the data are bewteen minus two and plus
+two standard deviations. Therefore the range divided by four is a rough estimation of the
+deviation.
+
+
+References
+---------
+.. footbibliography::
+
diff --git a/docs/src/visualization.rst b/docs/src/visualization.rst
index 6562ae28d..2874417b3 100644
--- a/docs/src/visualization.rst
+++ b/docs/src/visualization.rst
@@ -3,4 +3,9 @@
 
 =======================
 Tools for visualization
-=======================
\ No newline at end of file
+=======================
+
+.. toctree::
+   :maxdepth: 2
+
+   visualization/partial_dependance_plots
\ No newline at end of file
diff --git a/docs/src/visualization/partial_dependence.rst b/docs/src/visualization/partial_dependence.rst
new file mode 100644
index 000000000..646074478
--- /dev/null
+++ b/docs/src/visualization/partial_dependence.rst
@@ -0,0 +1,53 @@
+
+.. _partial_dependance_plots:
+
+========================
+Partial Dependence plots
+========================
+
+Definition
+==========
+
+A Partial Dependence Plot (PDP) :footcite:`friedman2001greedy` is a tool used to visualize
+the dependence between the target variable and a feature (or set of features) of interest.
+For important features, it can be used to understand their relationship with the target e.g.
+is it linear, monotonic or more complex.
+
+.. note::
+    We are limited to a small subset of features (less than 3) as it becomes tricky to
+    display more than 3/4 variables simultaneously. 
+.. maybe put the note later or not at all
+
+The partial dependence function that is plotted is defined as
+
+.. math::
+    f_S(x_S) &= \mathbb{E}_{X_{-S}}\left[ f(x_S, X_{-S}) \right]\\
+             &= \int f(x_S, x_{-S}) d\mathbb{P}(X_{-S}),
+
+where :math:`X_S` is the set of input features of interest, :math:`X_{-S}` is its complement
+and :math:`f(x_S, x_{-S})` is the learned decision function of the model of interest, evalutated
+on a sample :math:`x` whose values for the features in S are :math:`x_S` and for features in -S
+are :math:`x_{-S}`. The expectation is taken marginally on the values of :math:`X-{-S}`.
+
+The partial dependence function :math:`f_S` is estimated by Monte-Carlo with
+:math:`\bar{f}_S` defined as
+
+.. math::
+    \bar{f}_S(x_S) \approx \frac{1}{n_\text{n}} \sum_{i=1}^n f(x_S, x_{-S,i}),
+
+where :math:`\{x_{-S,i}}_{i=1}^n` are the values on the training set of the features in -S.
+This approximation is equivalent to averaging all the Individual Conditional Expectation (ICE)
+curves. These curves are the per-instance version of a PDP, where we display the evolution of
+the target when some features change, for one sample of the dataset.
+Most plots will include all ICE curves on the same plot with the PDP highlighted.
+
+It is possible to get a measure of feature importance from PDPs, which is explained in this section
+of the user guide: :ref:`pdp_importance`.
+
+Example(s)
+==========
+
+
+References
+----------
+.. bibliography:: ../../tools/references.bib
diff --git a/docs/tools/references.bib b/docs/tools/references.bib
index fe04dc12f..eadea946e 100644
--- a/docs/tools/references.bib
+++ b/docs/tools/references.bib
@@ -155,6 +155,15 @@ @article{fan2012variance
   year      = {2012}
 }
 
+@article{friedman2001greedy,
+  title     = {Greedy function approximation: a gradient boosting machine},
+  author    = {Friedman, Jerome H},
+  journal   = {Annals of statistics},
+  pages     = {1189--1232},
+  year      = {2001},
+  publisher = {JSTOR}
+}
+
 @article{gaonkar_deriving_2012,
   author  = {Gaonkar, Bilwaj and Davatzikos, Christos},
   journal = {International Conference on Medical Image Computing and Computer-Assisted Intervention},
@@ -169,6 +178,24 @@ @article{gaonkar_deriving_2012
   year    = {2012}
 }
 
+@article{greenwell2018simple,
+  title   = {A simple and effective model-based variable importance measure},
+  author  = {Greenwell, Brandon M and Boehmke, Bradley C and McCarthy, Andrew J},
+  journal = {arXiv preprint arXiv:1805.04755},
+  year    = {2018}
+}
+
+@article{goldstein2015peeking,
+  title     = {Peeking inside the black box: Visualizing statistical learning with plots of individual conditional expectation},
+  author    = {Goldstein, Alex and Kapelner, Adam and Bleich, Justin and Pitkin, Emil},
+  journal   = {journal of Computational and Graphical Statistics},
+  volume    = {24},
+  number    = {1},
+  pages     = {44--65},
+  year      = {2015},
+  publisher = {Taylor \& Francis}
+}
+
 @article{hirschhorn2005genome,
   author    = {Hirschhorn, Joel N and Daly, Mark J},
   journal   = {Nature reviews genetics},
@@ -202,6 +229,17 @@ @article{liu2022fast
   year      = {2022}
 }
 
+@article{molnar2025,
+  title    = {Interpretable Machine Learning},
+  subtitle = {A Guide for Making Black Box Models Explainable},
+  author   = {Christoph Molnar},
+  journal  = {github},
+  year     = {2025},
+  edition  = {3},
+  isbn     = {978-3-911578-03-5},
+  url      = {https://christophm.github.io/interpretable-ml-book}
+}
+
 @article{meinshausen2009pvalues,
   author    = {Nicolai Meinshausen, Lukas Meier and Peter Buhlmann},
   doi       = {10.1198/jasa.2009.tm08647},
diff --git a/examples/plot_partial_dependence.py b/examples/plot_partial_dependence.py
new file mode 100644
index 000000000..aa396acad
--- /dev/null
+++ b/examples/plot_partial_dependence.py
@@ -0,0 +1,471 @@
+"""
+Partial Dependence and Individual Conditional Expectation Plots
+===============================================================
+This example is a modified version of the the example proposed in Scikit-learn:
+https://scikit-learn.org/1.7/auto_examples/inspection/plot_partial_dependence.html
+Partial dependence plots show the dependence between the target function [1]_
+and a set of features of interest, marginalizing over the values of all other
+features (the complement features). We limit to only one feature for the
+calculation of the importance because the extension of this method to multiple
+features is not trivial.
+Similarly, an individual conditional expectation (ICE) plot :footcite:t:`goldstein2015peeking`
+shows the dependence between the target function and a feature of interest.
+However, unlike partial dependence plots, which show the average effect of the
+features of interest, ICE plots visualize the dependence of the prediction on a
+feature for each sample separately, with one line per sample.
+Only one feature of interest is supported for ICE plots.
+This example shows how to obtain partial dependence and ICE plots from a
+:class:`~sklearn.neural_network.MLPRegressor` and a
+:class:`~sklearn.ensemble.HistGradientBoostingRegressor` trained on the
+bike sharing dataset. The example is inspired by :footcite:t:`molnar2025`.
+
+Notes
+-----
+.. [1] For classification you can think of it as the regression score before
+       the link function.
+
+References
+----------
+.. footbibliography::
+
+"""
+
+# %%
+# Bike sharing dataset preprocessing
+# ----------------------------------
+#
+# We will use the bike sharing dataset. The goal is to predict the number of bike
+# rentals using weather and season data as well as the datetime information.
+from sklearn.datasets import fetch_openml
+
+bikes = fetch_openml("Bike_Sharing_Demand", version=2, as_frame=True)
+# Make an explicit copy to avoid "SettingWithCopyWarning" from pandas
+X, y = bikes.data.copy(), bikes.target
+
+# We use only a subset of the data to speed up the example.
+X = X.iloc[::5, :]
+y = y[::5]
+
+# %%
+# The feature `"weather"` has a particularity: the category `"heavy_rain"` is a rare
+# category.
+X["weather"].value_counts()
+
+# %%
+# Because of this rare category, we collapse it into `"rain"`.
+X["weather"] = (
+    X["weather"]
+    .astype(object)
+    .replace(to_replace="heavy_rain", value="rain")
+    .astype("category")
+)
+
+# %%
+# We now have a closer look at the `"year"` feature:
+X["year"].value_counts()
+
+# %%
+# We see that we have data from two years. We use the first year to train the
+# model and the second year to test the model.
+mask_training = X["year"] == 0.0
+X = X.drop(columns=["year"])
+X_train, y_train = X[mask_training], y[mask_training]
+X_test, y_test = X[~mask_training], y[~mask_training]
+
+# %%
+# We can check the dataset information to see that we have heterogeneous data types. We
+# have to preprocess the different columns accordingly.
+X_train.info()
+
+# %%
+# From the previous information, we will consider the `category` columns as nominal
+# categorical features. In addition, we will consider the date and time information as
+# categorical features as well.
+#
+# We manually define the columns containing numerical and categorical
+# features.
+numerical_features = [
+    "temp",
+    "feel_temp",
+    "humidity",
+    "windspeed",
+]
+categorical_features = X_train.columns.drop(numerical_features)
+
+# %%
+# Before we go into the details regarding  the different machine
+# learning pipelines, we will try to get some additional intuition regarding the dataset
+# that will be helpful to understand the model's statistical performance and results of
+# the partial dependence analysis.
+#
+# We plot the average number of bike rentals by grouping the data by season and
+# by year.
+from itertools import product
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+days = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")
+hours = tuple(range(24))
+xticklabels = [f"{day}\n{hour}:00" for day, hour in product(days, hours)]
+xtick_start, xtick_period = 6, 12
+
+fig, axs = plt.subplots(nrows=2, figsize=(8, 6), sharey=True, sharex=True)
+average_bike_rentals = bikes.frame.groupby(
+    ["year", "season", "weekday", "hour"], observed=True
+).mean(numeric_only=True)["count"]
+for ax, (idx, df) in zip(axs, average_bike_rentals.groupby("year")):
+    df.groupby("season", observed=True).plot(ax=ax, legend=True)
+
+    # decorate the plot
+    ax.set_xticks(
+        np.linspace(
+            start=xtick_start,
+            stop=len(xticklabels),
+            num=len(xticklabels) // xtick_period,
+        )
+    )
+    ax.set_xticklabels(xticklabels[xtick_start::xtick_period])
+    ax.set_xlabel("")
+    ax.set_ylabel("Average number of bike rentals")
+    ax.set_title(
+        f"Bike rental for {'2010 (train set)' if idx == 0.0 else '2011 (test set)'}"
+    )
+    ax.set_ylim(0, 1_000)
+    ax.set_xlim(0, len(xticklabels))
+    ax.legend(loc=2)
+
+# %%
+# The first striking difference between the train and test set is that the number of
+# bike rentals is higher in the test set. For this reason, it will not be surprising to
+# get a machine learning model that underestimates the number of bike rentals. We
+# also observe that the number of bike rentals is lower during the spring season. In
+# addition, we see that during working days, there is a specific pattern around 6-7
+# am and 5-6 pm with some peaks of bike rentals. We can keep in mind these different
+# insights and use them to understand the partial dependence plot.
+#
+# Preprocessor for machine-learning models
+# ----------------------------------------
+#
+# Since we later use two different models, a
+# :class:`~sklearn.neural_network.MLPRegressor` and a
+# :class:`~sklearn.ensemble.HistGradientBoostingRegressor`, we create two different
+# preprocessors, specific for each model.
+#
+# Preprocessor for the neural network model
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# We will use a :class:`~sklearn.preprocessing.QuantileTransformer` to scale the
+# numerical features and encode the categorical features with a
+# :class:`~sklearn.preprocessing.OneHotEncoder`.
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder, QuantileTransformer
+
+mlp_preprocessor = ColumnTransformer(
+    transformers=[
+        ("num", QuantileTransformer(n_quantiles=100), numerical_features),
+        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
+    ]
+)
+mlp_preprocessor
+
+# %%
+# Preprocessor for the gradient boosting model
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# For the gradient boosting model, we leave the numerical features as-is and only
+# encode the categorical features using a
+# :class:`~sklearn.preprocessing.OrdinalEncoder`.
+from sklearn.preprocessing import OrdinalEncoder
+
+hgbdt_preprocessor = ColumnTransformer(
+    transformers=[
+        ("cat", OrdinalEncoder(), categorical_features),
+        ("num", "passthrough", numerical_features),
+    ],
+    sparse_threshold=1,
+    verbose_feature_names_out=False,
+).set_output(transform="pandas")
+hgbdt_preprocessor
+
+# %%
+# 1-way partial dependence with different models
+# ----------------------------------------------
+#
+# In this section, we will compute 1-way partial dependence with two different
+# machine-learning models: (i) a multi-layer perceptron and (ii) a
+# gradient-boosting model. With these two models, we illustrate how to compute and
+# interpret partial dependence plots (PDP) for both numerical and categorical
+# features and individual conditional expectation (ICE).
+#
+# Multi-layer perceptron
+# ~~~~~~~~~~~~~~~~~~~~~~
+#
+# Let's fit a :class:`~sklearn.neural_network.MLPRegressor` and compute
+# single-variable partial dependence plots.
+from time import time
+
+from sklearn.neural_network import MLPRegressor
+from sklearn.pipeline import make_pipeline
+
+print("Training MLPRegressor...")
+tic = time()
+mlp_model = make_pipeline(
+    mlp_preprocessor,
+    MLPRegressor(
+        hidden_layer_sizes=(30, 15),
+        learning_rate_init=0.01,
+        early_stopping=True,
+        random_state=0,
+    ),
+)
+mlp_model.fit(X_train, y_train)
+print(f"done in {time() - tic:.3f}s")
+print(f"Test R2 score: {mlp_model.score(X_test, y_test):.2f}")
+
+# %%
+# We configured a pipeline using the preprocessor that we created specifically for the
+# neural network and tuned the neural network size and learning rate to get a reasonable
+# compromise between training time and predictive performance on a test set.
+#
+# Importantly, this tabular dataset has very different dynamic ranges for its
+# features. Neural networks tend to be very sensitive to features with varying
+# scales and forgetting to preprocess the numeric feature would lead to a very
+# poor model.
+#
+# It would be possible to get even higher predictive performance with a larger
+# neural network but the training would also be significantly more expensive.
+#
+# Note that it is important to check that the model is accurate enough on a
+# test set before plotting the partial dependence since there would be little
+# use in explaining the impact of a given feature on the prediction function of
+# a model with poor predictive performance. In this regard, our MLP model works
+# reasonably well.
+#
+# We will plot the averaged partial dependence.
+import matplotlib.pyplot as plt
+
+from hidimstat.marginal.partial_dependence_plot import PartialDependencePlot
+
+common_params = {
+    "n_jobs": 2,
+    "grid_resolution": 20,
+}
+
+print("Computing partial dependence plots...")
+features_info = {
+    # features of interest
+    "features": ["temp", "humidity", "windspeed", "season", "weather", "hour"],
+    # information regarding categorical features
+    "categorical_features": categorical_features,
+}
+tic = time()
+pdp = PartialDependencePlot(
+    mlp_model,
+    **features_info,
+    **common_params,
+)
+pdp.fit_importance(X=X_train)
+# plot the figure
+fig, axs = plt.subplots(ncols=3, nrows=2, figsize=(9, 8), constrained_layout=True)
+for i, ax in enumerate(axs.ravel()):
+    _ = pdp.plot(feature_id=i, ax=ax, percentage_ice=0.1)
+
+print(f"done in {time() - tic:.3f}s")
+_ = fig.suptitle(
+    (
+        "Partial dependence of the number of bike rentals\n"
+        "for the bike rental dataset with an MLPRegressor"
+    ),
+    fontsize=16,
+)
+
+# %%
+# Gradient boosting
+# ~~~~~~~~~~~~~~~~~
+#
+# Let's now fit a :class:`~sklearn.ensemble.HistGradientBoostingRegressor` and
+# compute the partial dependence on the same features. We also use the
+# specific preprocessor we created for this model.
+from sklearn.ensemble import HistGradientBoostingRegressor
+
+print("Training HistGradientBoostingRegressor...")
+tic = time()
+hgbdt_model = make_pipeline(
+    hgbdt_preprocessor,
+    HistGradientBoostingRegressor(
+        categorical_features=categorical_features,
+        random_state=0,
+        max_iter=50,
+    ),
+)
+hgbdt_model.fit(X_train, y_train)
+print(f"done in {time() - tic:.3f}s")
+print(f"Test R2 score: {hgbdt_model.score(X_test, y_test):.2f}")
+
+# %%
+# Here, we used the default hyperparameters for the gradient boosting model
+# without any preprocessing as tree-based models are naturally robust to
+# monotonic transformations of numerical features.
+#
+# Note that on this tabular dataset, Gradient Boosting Machines are both
+# significantly faster to train and more accurate than neural networks. It is
+# also significantly cheaper to tune their hyperparameters (the defaults tend
+# to work well while this is not often the case for neural networks).
+#
+# We will plot the partial dependence for some of the numerical and categorical
+# features.
+print("Computing partial dependence plots...")
+tic = time()
+fig, axs = plt.subplots(ncols=3, nrows=2, figsize=(9, 8), constrained_layout=True)
+pdp = PartialDependencePlot(
+    hgbdt_model,
+    **features_info,
+    **common_params,
+)
+pdp.fit_importance(X=X_train)
+for i, ax in enumerate(axs.ravel()):
+    _ = pdp.plot(feature_id=i, ax=ax, X=X_train)
+print(f"done in {time() - tic:.3f}s")
+_ = fig.suptitle(
+    (
+        "Partial dependence of the number of bike rentals\n"
+        "for the bike rental dataset with a gradient boosting"
+    ),
+    fontsize=16,
+)
+
+# %%
+# Analysis of the plots
+# ~~~~~~~~~~~~~~~~~~~~~
+#
+# We will first look at the PDPs for the numerical features. For both models, the
+# general trend of the PDP of the temperature is that the number of bike rentals is
+# increasing with temperature. We can make a similar analysis but with the opposite
+# trend for the humidity features. The number of bike rentals is decreasing when the
+# humidity increases. Finally, we see the same trend for the wind speed feature. The
+# number of bike rentals is decreasing when the wind speed is increasing for both
+# models. We also observe that :class:`~sklearn.neural_network.MLPRegressor` has much
+# smoother predictions than :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
+#
+# Now, we will look at the partial dependence plots for the categorical features.
+#
+# We observe that the spring season is the lowest bar for the season feature. With the
+# weather feature, the rain category is the lowest bar. Regarding the hour feature,
+# we see two peaks around the 7 am and 6 pm. These findings are in line with the
+# the observations we made earlier on the dataset.
+#
+# However, it is worth noting that we are creating potential meaningless
+# synthetic samples if features are correlated.
+#
+# .. _ice-vs-pdp:
+#
+# ICE vs. PDP
+# ~~~~~~~~~~~
+#
+# PDP is an average of the marginal effects of the features. We are averaging the
+# response of all samples of the provided set. Thus, some effects could be hidden. In
+# this regard, it is possible to plot each individual response. This representation is
+# called the Individual Effect Plot (ICE). In the plot below, we plot 50 randomly
+# selected ICEs for the temperature and humidity features.
+print("Computing partial dependence plots and individual conditional expectation...")
+tic = time()
+# sphinx_gallery_thumbnail_number = 4
+fig, axs = plt.subplots(ncols=2, figsize=(6, 4), sharey=True, constrained_layout=True)
+
+features_info = {
+    "features": ["temp", "humidity"],
+    # "centered": True,
+}
+
+pdp = PartialDependencePlot(
+    hgbdt_model,
+    **features_info,
+    **common_params,
+)
+# plot the figure
+pdp.fit_importance(X=X_train)
+for i, ax in enumerate(axs.ravel()):
+    _ = pdp.plot(feature_id=i, ax=ax, X=X_train)
+print(f"done in {time() - tic:.3f}s")
+_ = fig.suptitle("ICE and PDP representations", fontsize=16)
+
+# %%
+# We see that the ICE for the temperature feature gives us some additional information:
+# Some of the ICE lines are flat while some others show a decrease of the dependence
+# for temperature above 35 degrees Celsius. We observe a similar pattern for the
+# humidity feature: some of the ICEs lines show a sharp decrease when the humidity is
+# above 80%.
+#
+# Not all ICE lines are parallel, this indicates that the model finds
+# interactions between features. We can repeat the experiment by constraining the
+# gradient boosting model to not use any interactions between features using the
+# parameter `interaction_cst`:
+from sklearn.base import clone
+
+interaction_cst = [[i] for i in range(X_train.shape[1])]
+hgbdt_model_without_interactions = (
+    clone(hgbdt_model)
+    .set_params(histgradientboostingregressor__interaction_cst=interaction_cst)
+    .fit(X_train, y_train)
+)
+print(f"Test R2 score: {hgbdt_model_without_interactions.score(X_test, y_test):.2f}")
+
+# %%
+fig, axs = plt.subplots(ncols=2, figsize=(6, 4), sharey=True, constrained_layout=True)
+
+# features_info["centered"] = False
+pdp = PartialDependencePlot(
+    hgbdt_model_without_interactions,
+    **features_info,
+    **common_params,
+)
+pdp.fit_importance(X=X_train)
+for i, ax in enumerate(axs.ravel()):
+    _ = pdp.plot(feature_id=i, ax=ax, X=X_train)
+_ = fig.suptitle("ICE and PDP representations", fontsize=16)
+
+# %%
+# .. _plt_partial_dependence_custom_values:
+#
+# Custom Inspection Points
+# ~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# None of the examples so far specify _which_ points are evaluated to create the
+# partial dependence plots. By default we use percentiles defined by the input dataset.
+# In some cases it can be helpful to specify the exact points where one would like the
+# model evaluated. For instance, if a user wants to test the model behavior on
+# out-of-distribution data or compare two models that were fit on slightly different
+# data. The `custom_values` parameter allows the user to pass in the values that they
+# want the model to be evaluated on. This overrides the `grid_resolution` and
+# `percentiles` parameters. Let's return to our gradient boosting example above
+# but with custom values
+
+print("Computing partial dependence plots with custom evaluation values...")
+tic = time()
+fig, axs = plt.subplots(ncols=2, figsize=(6, 4), sharey=True, constrained_layout=True)
+
+features_info = {
+    "features": ["temp", "humidity"],
+}
+
+pdp = PartialDependencePlot(
+    hgbdt_model,
+    **features_info,
+    **common_params,
+    # we set custom values for temp feature -
+    # all other features are evaluated based on the data
+    custom_values={"temp": np.linspace(0, 40, 10)},
+)
+pdp.fit_importance(X=X_train)
+for i, ax in enumerate(axs.ravel()):
+    _ = pdp.plot(feature_id=i, ax=ax, X=X_train)
+print(f"done in {time() - tic:.3f}s")
+_ = fig.suptitle(
+    (
+        "Partial dependence of the number of bike rentals\n"
+        "for the bike rental dataset with a gradient boosting"
+    ),
+    fontsize=16,
+)
+plt.show()
diff --git a/src/hidimstat/__init__.py b/src/hidimstat/__init__.py
index 3bb1ef1bf..3ff85084c 100644
--- a/src/hidimstat/__init__.py
+++ b/src/hidimstat/__init__.py
@@ -25,6 +25,11 @@
 
 from .statistical_tools.aggregation import quantile_aggregation
 
+# marginal methods
+from .marginal import PartialDependencePlot  # for having documentation
+from .marginal import PartialDependencePlot as PDP
+
+
 try:
     from ._version import __version__
 except ImportError:
@@ -49,4 +54,6 @@
     "CFI",
     "LOCO",
     "PFI",
+    # marginal methods
+    "PDP",
 ]
diff --git a/src/hidimstat/marginal/__init__.py b/src/hidimstat/marginal/__init__.py
new file mode 100644
index 000000000..2483ff3a1
--- /dev/null
+++ b/src/hidimstat/marginal/__init__.py
@@ -0,0 +1,3 @@
+from .partial_dependence_plot import PartialDependencePlot
+
+__all__ = ["PartialDependencePlot"]
diff --git a/src/hidimstat/marginal/partial_dependence_plot.py b/src/hidimstat/marginal/partial_dependence_plot.py
new file mode 100644
index 000000000..e9e2310d1
--- /dev/null
+++ b/src/hidimstat/marginal/partial_dependence_plot.py
@@ -0,0 +1,838 @@
+from collections.abc import Iterable
+from copy import deepcopy
+from tqdm import tqdm
+import warnings
+
+from joblib import Parallel, delayed
+import numpy as np
+from scipy import sparse
+from scipy.stats.mstats import mquantiles
+from sklearn.base import is_classifier, is_regressor
+from sklearn.utils import _safe_indexing, check_array, check_random_state
+from sklearn.utils._indexing import (
+    _determine_key_type,
+    _get_column_indices,
+    _safe_assign,
+)
+from sklearn.utils._response import _get_response_values
+from sklearn.utils.validation import _check_sample_weight, check_is_fitted
+from sklearn.inspection._pd_utils import _check_feature_names, _get_feature_index
+from sklearn.inspection._partial_dependence import _grid_from_X
+
+from hidimstat.base_variable_importance import (
+    BaseVariableImportance,
+)
+
+
+def _grid_from_X(
+    X,
+    percentiles,
+    is_categorical,
+    grid_resolution,
+    custom_values,
+    statistical_resolution=False,
+):
+    """
+    Generate a grid of points based on the percentiles of X.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_target_features)
+        The data from which to generate the grid.
+    percentiles : a pair of float (p1, p2)
+        The lower and upper percentiles to use for the grid boundaries.
+        Must satisfy 0 <= p1 < p2 <= 1.
+    is_categorical : list of bool
+        For each feature, indicates whether it is categorical or not.
+        For categorical features, unique values are used instead of percentiles.
+    grid_resolution : int
+        Number of equally spaced points for the grid.
+        Must be greater than 1.
+    custom_values : dict
+        Mapping from column index of X to array-like of custom values
+        to use for that feature instead of the generated grid.
+    statistical_resolution : bool, default=False
+        If True, uses quantiles for grid points instead of equally spaced points
+        between percentile boundaries.
+
+    Returns
+    -------
+    values : list of 1d ndarrays
+        List containing the unique grid values for each feature.
+        Each array has length <= grid_resolution.
+    indexes : list of 1d ndarrays
+        For each feature, contains the indices mapping each sample in X
+        to its position in the grid.
+
+    Notes
+    -----
+    Based on scikit-learn's _grid_from_X implementation:
+    https://github.com/scikit-learn/scikit-learn/blob/c5497b7f7eacfaff061cf68e09bcd48aa93d4d6b/sklearn/inspection/_partial_dependence.py#L40
+    """
+    assert (
+        isinstance(percentiles, Iterable) and len(percentiles) == 2
+    ), "'percentiles' must be a sequence of 2 elements."
+    assert all(
+        0 <= x <= 1 for x in percentiles
+    ), "'percentiles' values must be in [0, 1]."
+    assert (
+        percentiles[0] < percentiles[1]
+    ), "percentiles[0] must be strictly less than percentiles[1]."
+    assert grid_resolution > 1, "'grid_resolution' must be strictly greater than 1."
+
+    def _convert_custom_values(values):
+        # Convert custom types such that object types are always used for string arrays
+        dtype = object if any(isinstance(v, str) for v in values) else None
+        return np.asarray(values, dtype=dtype)
+
+    custom_values = {k: _convert_custom_values(v) for k, v in custom_values.items()}
+    if any(v.ndim != 1 for v in custom_values.values()):
+        error_string = ", ".join(
+            f"Feature {k}: {v.ndim} dimensions"
+            for k, v in custom_values.items()
+            if v.ndim != 1
+        )
+
+        raise ValueError(
+            "The custom grid for some features is not a one-dimensional array. "
+            f"{error_string}"
+        )
+
+    values = []
+    indexes = []
+    # TODO: we should handle missing values (i.e. `np.nan`) specifically and store them
+    # in a different Bunch attribute.
+    for feature_idx, is_cat in enumerate(is_categorical):
+        data = _safe_indexing(X, feature_idx, axis=1)
+        if feature_idx in custom_values:
+            # Use values in the custom range
+            axis = custom_values[feature_idx]
+        else:
+            try:
+                uniques = np.unique(data)
+            except TypeError as exc:
+                # `np.unique` will fail in the presence of `np.nan` and `str` categories
+                # due to sorting. Temporary, we reraise an error explaining the problem.
+                raise ValueError(
+                    f"The column #{feature_idx} contains mixed data types. Finding unique "
+                    "categories fail due to sorting. It usually means that the column "
+                    "contains `np.nan` values together with `str` categories. Such use "
+                    "case is not yet supported in scikit-learn."
+                ) from exc
+
+            if is_cat or uniques.shape[0] < grid_resolution:
+                # Use the unique values either because:
+                # - feature has low resolution use unique values
+                # - feature is categorical
+                axis = uniques
+            else:
+                # create axis based on percentiles and grid resolution
+                if statistical_resolution:
+                    axis = np.unique(
+                        mquantiles(
+                            data,
+                            prob=np.linspace(0.0, 1.0, grid_resolution + 1),
+                            axis=0,
+                        )
+                    )
+                else:
+                    emp_percentiles = mquantiles(data, prob=percentiles, axis=0)
+                    if np.allclose(emp_percentiles[0], emp_percentiles[1]):
+                        raise ValueError(
+                            "percentiles are too close to each other, "
+                            "unable to build the grid. Please choose percentiles "
+                            "that are further apart."
+                        )
+                    axis = np.linspace(
+                        emp_percentiles[0],
+                        emp_percentiles[1],
+                        num=grid_resolution,
+                        endpoint=True,
+                    )
+        values.append(axis)
+        if not is_cat:
+            # -1 is for correction of the number of classes
+            digitize = np.digitize(data, axis, right=True) - 1
+        else:
+            # convert the object in string if it's necesarry
+            string_to_num = {s: i for i, s in enumerate(sorted(set(data)))}
+            digitize = np.digitize(
+                np.array([string_to_num[s] for s in data]),
+                [string_to_num[s] for s in axis],
+                right=True,
+            )
+        indexes.append(np.clip(digitize, 0, None))
+    return values, indexes
+
+
+def _partial_dependence_brute(est, grid, features, X, method, n_jobs):
+    """
+    Calculate partial dependence using the brute force method.
+    For each value in `grid`, replaces the target features with that value for all samples
+    in X, makes predictions, and averages them. This computes the mean model response
+    across the data distribution for each grid point.
+
+    Parameters
+    ----------
+    est : BaseEstimator
+        A fitted estimator implementing predict, predict_proba, or decision_function.
+        Multioutput-multiclass classifiers not supported.
+    grid : list of 1d arrays
+        List containing grid values for each target feature. Each array contains the
+        values where partial dependence will be evaluated.
+    features : array-like of int
+        Indices of the features for which to compute partial dependence.
+    X : array-like of shape (n_samples, n_features)
+        Training data used to compute predictions.
+    method : {'auto', 'predict_proba', 'decision_function'}, default='auto'
+        Method to get model predictions:
+        - 'auto': uses predict_proba if available, otherwise decision_function
+        - 'predict_proba': uses predict_proba
+        - 'decision_function': uses decision_function
+        For regressors, always uses predict.
+    n_jobs : int
+        Number of parallel jobs to run.
+
+    Returns
+    -------
+    list of arrays
+        List containing arrays of predictions for each feature, averaged over X.
+        For each feature:
+        - Shape (n_grid_points, n_samples) for regression/binary classification
+        - Shape (n_grid_points, n_samples, n_classes) for multiclass
+    """
+    predictions = []
+
+    if method == "auto":
+        method = (
+            "predict" if is_regressor(est) else ["predict_proba", "decision_function"]
+        )
+    predictions = Parallel(n_jobs=n_jobs)(
+        delayed(_joblib_get_predictions)(variable, values, X, est, method)
+        for variable, values in tqdm(zip(features, grid))
+    )
+    return predictions
+
+
+def _joblib_get_predictions(variable, values, X, est, method):
+    """
+    Helper function to get predictions for a single feature for parallel processing.
+
+    Parameters
+    ----------
+    variable : int
+        Index of the feature for which to get predictions.
+    values : array-like
+        Grid values for the target feature.
+    X : array-like of shape (n_samples, n_features)
+        Training data used to compute predictions.
+    est : BaseEstimator
+        A fitted estimator implementing predict, predict_proba, or decision_function.
+    method : str or list of str
+        Method to get model predictions: 'predict', 'predict_proba', or 'decision_function'.
+
+    Returns
+    -------
+    list of array
+        List of predictions for each grid point, each with shape:
+        - (n_samples,) for regression
+        - (n_samples, 1) for binary classification
+        - (n_samples, n_classes) for multiclass
+    """
+    predictions = []
+    for new_values in values:
+        _safe_assign(X, new_values, column_indexer=variable)
+
+        # Note: predictions is of shape
+        # (n_points,) for non-multioutput regressors
+        # (n_points, n_tasks) for multioutput regressors
+        # (n_points, 1) for the regressors in cross_decomposition (I think)
+        # (n_points, 1) for binary classification (positive class already selected)
+        # (n_points, n_classes) for multiclass classification
+        pred, _ = _get_response_values(est, X, response_method=method)
+        if len(pred.shape) > 1 and pred.shape[1] > 1:
+            pred = np.max(pred, axis=1)
+        predictions.append(np.squeeze(pred))
+    return predictions
+
+
+class PartialDependencePlot(BaseVariableImportance):
+    """
+    Partial Dependence Plot (PDP):footcite:t:`friedman2001greedy` for analyzing
+    feature effects on model predictions. This is based on individual conditional
+    expectation (ICE) :footcite:t:`goldstein2015peeking`.
+    PDP shows the average model prediction across different values of target features,
+    while marginalizing over the values of all other features. It helps understand
+    how features affect predictions on average. ICE curves show predictions for
+    individual samples as the feature value changes.
+    Feature importance scores are computed following :footcite:t:`greenwell2018simple`:
+    - For continuous features: standard deviation of PDP curve
+    - For categorical features: range of PDP values divided by 4
+
+    Parameters
+    ----------
+    estimator : BaseEstimator
+        A fitted estimator object implementing predict, predict_proba, or
+        decision_function. Must be a regressor or binary/multiclass classifier.
+        Multioutput-multiclass classifiers are not supported.
+    features : array-like of {int, str}, default=None
+        Features for which to compute partial dependence. Can be:
+        - Single feature: int or str
+        - Multiple features: list of int or str
+        Feature indices or names must match training data.
+    method : {'auto', 'predict_proba', 'decision_function'}, default='auto'
+        Method for getting predictions:
+        - 'auto': tries predict_proba first, falls back to decision_function
+        - 'predict_proba': uses predicted probabilities
+        - 'decision_function': uses decision function scores
+        Ignored for regressors which always use predict.
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights for computing weighted averages of predictions.
+        If None, samples are weighted equally.
+    categorical_features : array-like, default=None
+        Specifies categorical features. Can be:
+        - None: no categorical features
+        - boolean array: mask indicating categorical features
+        - array of int/str: indices/names of categorical features
+    feature_names : array-like of str, default=None
+        Names of features in the training data.
+        If None, uses array indices for NumPy arrays or column names for pandas.
+    percentiles : tuple of float, default=(0.05, 0.95)
+        Lower and upper percentiles for grid boundaries.
+        Used for continuous features if custom_values not provided.
+        Must be in [0, 1].
+    grid_resolution : int, default=100
+        Number of grid points for continuous features.
+        Higher values give more granular curves but increase computation time.
+        Ignored if custom_values provided.
+    custom_values : dict, default=None
+        Custom grid values for features. Dictionary mapping feature index/name
+        to array of values to evaluate.
+        Overrides percentiles and grid_resolution for specified features.
+    statistical_resolution : bool, default=False
+        If True, uses quantile-based grid points instead of evenly spaced points.
+        Can better capture feature distribution.
+    n_jobs : int, default=1
+        Number of CPU cores to use for parallel processing.
+        -1 means using all processors.
+
+    Attributes
+    ----------
+    importances_ : ndarray of shape (n_features,)
+        Computed feature importance scores based on PDP variance
+    ices_ : list of arrays
+        Individual Conditional Expectation curves for each feature
+    values_ : list of arrays
+        Grid values used for each feature
+
+    See Also
+    --------
+    sklearn.inspection.partial_dependence : Similar functionality in scikit-learn
+
+    References
+    ----------
+    .. footbibliography::
+    """
+
+    def __init__(
+        self,
+        estimator,
+        features=None,
+        method: str = "auto",
+        sample_weight=None,
+        categorical_features=None,
+        feature_names=None,
+        percentiles=(0.05, 0.95),
+        grid_resolution=100,
+        custom_values=None,
+        statistical_resolution=False,
+        n_jobs: int = 1,
+    ):
+        super().__init__()
+        check_is_fitted(estimator)
+        self.estimator = estimator
+        if not (is_classifier(self.estimator) or is_regressor(self.estimator)):
+            raise ValueError("'estimator' must be a fitted regressor or classifier.")
+
+        if is_classifier(self.estimator) and isinstance(
+            self.estimator.classes_[0], np.ndarray
+        ):
+            raise ValueError("Multiclass-multioutput estimators are not supported")
+
+        if is_regressor(self.estimator) and (method != "auto" and method != "predict"):
+            raise ValueError(
+                "The method parameter is ignored for regressors and "
+                "must be 'auto' or 'predict'."
+            )
+        self.features = features
+        self.method = method
+        self.sample_weight = sample_weight
+        self.categorical_features = categorical_features
+        self.feature_names = feature_names
+        self.percentiles = percentiles
+        self.grid_resolution = grid_resolution
+        self.custom_values = custom_values
+        self.statistical_resolution = statistical_resolution
+        self.n_jobs = n_jobs
+
+    def fit(self, X=None, y=None):
+        """
+        Fits the PartialDependencePlot model. This method has no effect as PDP
+        only needs a fitted estimator.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            (Not used) Training data. Not used, present here for API consistency.
+        y : array-like of shape (n_samples,)
+            (Not used) Target values. Not used, present here for API consistency.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        if X is not None:
+            warnings.warn("X won't be used")
+        if y is not None:
+            warnings.warn("y won't be used")
+        return self
+
+    def _set_environment(self, X_):
+        """
+        Set up environment variables needed for importance calculation.
+
+        Parameters
+        ----------
+        X_ : array-like of shape (n_samples, n_features)
+            Input data on which to compute feature importance.
+
+        Returns
+        -------
+        tuple
+            Contains:
+            - X_subset : array-like, subset of X_ with only target features
+            - custom_values_for_X_subset : dict, custom values for target features
+
+        Raises
+        ------
+        ValueError
+            If features indices are negative.
+        ValueError
+            If categorical_features is empty list.
+        ValueError
+            If categorical_features boolean mask has wrong length.
+        ValueError
+            If categorical_features has invalid dtype.
+        ValueError
+            If features contain integer data.
+        """
+        if self.sample_weight is not None:
+            self.sample_weight = _check_sample_weight(self.sample_weight, X_)
+
+        if _determine_key_type(self.features, accept_slice=False) == "int":
+            # _get_column_indices() supports negative indexing. Here, we limit
+            # the indexing to be positive. The upper bound will be checked
+            # by _get_column_indices()
+            if np.any(np.less(self.features, 0)):
+                raise ValueError(
+                    "all features must be in [0, {}]".format(X_.shape[1] - 1)
+                )
+
+        if self.features is None:
+            self.features = list(range(X_.shape[1]))
+        self.features_indices = np.asarray(
+            _get_column_indices(X_, self.features), dtype=np.intp, order="C"
+        ).ravel()
+
+        self.feature_names = _check_feature_names(X_, self.feature_names)
+
+        n_features = X_.shape[1]
+        if self.categorical_features is None:
+            self.is_categorical = [False] * len(self.features_indices)
+        else:
+            categorical_features = np.asarray(self.categorical_features)
+            if categorical_features.size == 0:
+                raise ValueError(
+                    "Passing an empty list (`[]`) to `categorical_features` is not "
+                    "supported. Use `None` instead to indicate that there are no "
+                    "categorical features."
+                )
+            if categorical_features.dtype.kind == "b":
+                # categorical features provided as a list of boolean
+                if categorical_features.size != n_features:
+                    raise ValueError(
+                        "When `categorical_features` is a boolean array-like, "
+                        "the array should be of shape (n_features,). Got "
+                        f"{categorical_features.size} elements while `X` contains "
+                        f"{n_features} features."
+                    )
+                self.is_categorical = [
+                    categorical_features[idx] for idx in self.features_indices
+                ]
+            elif categorical_features.dtype.kind in ("i", "O", "U"):
+                # categorical features provided as a list of indices or feature names
+                categorical_features_idx = [
+                    _get_feature_index(cat, feature_names=self.feature_names)
+                    for cat in categorical_features
+                ]
+                self.is_categorical = [
+                    idx in categorical_features_idx for idx in self.features_indices
+                ]
+            else:
+                raise ValueError(
+                    "Expected `categorical_features` to be an array-like of boolean,"
+                    f" integer, or string. Got {categorical_features.dtype} instead."
+                )
+
+        custom_values = self.custom_values or {}
+        if isinstance(self.features, (str, int)):
+            self.features = [self.features]
+
+        for feature_idx, feature, is_cat in zip(
+            self.features_indices, self.features, self.is_categorical
+        ):
+            if is_cat:
+                continue
+
+            if _safe_indexing(X_, feature_idx, axis=1).dtype.kind in "iu":
+                # TODO(1.9): raise a ValueError instead.
+                raise ValueError(
+                    f"The column {feature!r} contains integer data. Partial "
+                    "dependence plots are not supported for integer data: this "
+                    "can lead to implicit roun[val.shape[0] for val in self.values_]ding with NumPy arrays or even errors "
+                    "with newer pandas versions. Please convert numerical features"
+                    "to floating point dtypes ahead of time to avoid problems. "
+                )
+
+        X_subset = _safe_indexing(X_, self.features_indices, axis=1)
+
+        custom_values_for_X_subset = {
+            index: custom_values.get(feature)
+            for index, feature in enumerate(self.features)
+            if feature in custom_values
+        }
+        return X_subset, custom_values_for_X_subset
+
+    def importance(self, X, y=None):
+        """
+        Calculate partial dependence importance scores for each feature.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Data to compute partial dependence on. Must have same features
+            as training data.
+        y : array-like of shape (n_samples,)
+            (not used) Target values. Kept for API compatibility.
+
+        Returns
+        -------
+        ndarray of shape (n_features,)
+            Importance scores for each feature based on partial dependence.
+            Higher values indicate greater importance.
+
+        Raises
+        ------
+        ValueError
+            If features indices are negative.
+        ValueError
+            If categorical_features is empty list.
+        ValueError
+            If categorical_features boolean mask has wrong length.
+        ValueError
+            If categorical_features has invalid dtype.
+        ValueError
+            If features contain integer data.
+        """
+        assert self.importances_ is None, "Partial Dependance Plot already fitted"
+
+        if y is not None:
+            warnings.warn("y won't be used")
+        # Use check_array only on lists and other non-array-likes / sparse. Do not
+        # convert DataFrame into a NumPy array.
+        if not (hasattr(X, "__array__") or sparse.issparse(X)):
+            # TODO the option was ensure_all_finite added in version 1.6 of scikit learn, it's removed for retrocompatibility
+            # ensure_all_finite="allow-nan"))
+            X_ = deepcopy(check_array(X, dtype=object))
+        else:
+            X_ = deepcopy(X)
+        X_subset, custom_values_for_X_subset = self._set_environment(X_)
+
+        self.values_, _ = _grid_from_X(
+            X_subset,
+            self.percentiles,
+            self.is_categorical,
+            self.grid_resolution,
+            custom_values_for_X_subset,
+            self.statistical_resolution,
+        )
+
+        self.ices_ = _partial_dependence_brute(
+            self.estimator,
+            self.values_,
+            self.features_indices,
+            X_,
+            self.method,
+            self.n_jobs,
+        )
+
+        averaged_predictions_continuous = []
+        averaged_predictions_categorie = []
+        # average over samples
+        for index, ice in enumerate(self.ices_):
+            if not self.is_categorical[index]:
+                averaged_predictions_continuous.append(
+                    np.average(ice, axis=1, weights=self.sample_weight)
+                )
+            else:
+                averaged_predictions_categorie.append(
+                    np.average(ice, axis=1, weights=self.sample_weight)
+                )
+
+        # compute importance from equation 4 of greenwell2018simple
+        self.importances_ = np.zeros_like(self.is_categorical, dtype=float)
+        # importance for continous variable
+        if len(averaged_predictions_continuous) > 0:
+            importance_continuous = []
+            for averaged_prediction in averaged_predictions_continuous:
+                importance_continuous.append(
+                    np.sqrt(
+                        np.sum(
+                            (averaged_prediction - np.mean(averaged_prediction)) ** 2
+                        )
+                        / (len(averaged_prediction) - 1)
+                    )
+                )
+            self.importances_[np.logical_not(self.is_categorical)] = (
+                importance_continuous
+            )
+        # importance for categoritcal features
+        if len(averaged_predictions_categorie) > 0:
+            importance_categories = []
+            for averaged_prediction in averaged_predictions_categorie:
+                importance_categories.append(
+                    (np.max(averaged_prediction) - np.min(averaged_prediction)) / 4
+                )
+            self.importances_[self.is_categorical] = importance_categories
+        self.pvalues_ = None
+
+        return self.importances_
+
+    def fit_importance(self, X, y=None, cv=None):
+        """
+        Convenience method to fit and calculate importance scores in one step.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Data to compute partial dependence on. Must have same features
+            as training data.
+        y : array-like of shape (n_samples,), default=None
+            Not used, kept for API compatibility.
+        cv : object, default=None
+            Not used, kept for API compatibility.
+
+        Returns
+        -------
+        ndarray of shape (n_features,)
+            Importance scores for each feature based on partial dependence.
+            Higher values indicate greater importance.
+        """
+        if y is not None:
+            warnings.warn("y won't be used")
+        if cv is not None:
+            warnings.warn("cv won't be used")
+        self.fit()
+        return self.importance(X)
+
+    def plot(
+        self,
+        feature_id,
+        ax=None,
+        X=None,
+        n_bins=5,
+        percentage_ice=1.0,
+        random_state=None,
+        **kwargs,
+    ):
+        """
+        Plot partial dependence and ICE curves for a given feature.
+
+        Parameters
+        ----------
+        feature_id : int
+            Index of the feature to plot from the features used to compute PDP.
+        ax : matplotlib.axes.Axes, default=None
+            Pre-existing axes for the plot. If None, generates new figure and axes.
+        X : array-like, default=None
+            Training data used to compute the quantiles and feature distribution.
+            If provided, adds rugplot and quantile indicators to visualization.
+        nbins : int, default=5
+            Number of bins/quantiles to show in the plot when X is provided.
+        percentage_ice : float, default=1.0
+            Proportion of ICE curves to plot, between 0 and 1.
+            Lower values reduce visual clutter.
+        random_state : int or RandomState, default=None
+            Controls random sampling of ICE curves when percentage_ice < 1.
+        **kwargs : dict
+            Additional keyword arguments passed to plot function.
+
+        Returns
+        -------
+        matplotlib.axes.Axes
+            The axes containing the plot.
+
+        Raises
+        ------
+        Exception
+            If seaborn is not installed.
+
+        Notes
+        -----
+        For continuous features:
+        - Blue lines show individual ICE curves
+        - Black line shows averaged PDP curve
+        - Rug plot shows data distribution
+        - Top axis shows percentile values
+        For categorical features:
+        - Box plot shows distribution of predictions per category
+        """
+        try:
+            import seaborn as sns
+            import matplotlib.pyplot as plt
+        except ImportError:
+            raise Exception("You need to install seabor for using this functionnality")
+        assert (
+            percentage_ice <= 1.0 and percentage_ice >= 0.0
+        ), "percentage of ice need to be 0 and 1"
+        self._check_importance()
+
+        rng = check_random_state(random_state)
+        # subsample ice
+        ice_lines_idx = rng.choice(
+            len(self.ices_[feature_id][0]),
+            int(len(self.ices_[feature_id][0]) * percentage_ice),
+            replace=False,
+        )
+        # centers = np.array(self.values_[1:] + self.values_[:-1]) / 2
+        if ax is None:
+            fig, ax = plt.subplots()
+        # add the percentage of the quantiles distributions
+        if not self.is_categorical[feature_id]:
+            # plot ice
+            ax.plot(
+                np.array(self.values_[feature_id]),
+                np.array(self.ices_[feature_id])[:, ice_lines_idx],
+                color="lightblue",
+                alpha=0.5,
+                linewidth=0.5,
+            )
+            # plot pdp
+            ax.plot(
+                self.values_[feature_id],
+                np.average(
+                    self.ices_[feature_id],
+                    axis=1,
+                    weights=self.sample_weight,
+                ),
+                color="black",
+                **kwargs,
+            )
+            if X is not None:
+                data = (_safe_indexing(X, self.features_indices[feature_id], axis=1),)
+                _ax_quantiles(
+                    ax,
+                    np.unique(
+                        np.quantile(data, np.linspace(0, 1, n_bins), method="lower")
+                    ),
+                )
+                # add distribution of value
+                sns.rugplot(data, ax=ax, alpha=0.2, legend=False)
+            ax.set_xlabel(self.feature_names[feature_id])
+        else:
+            ax.boxplot(
+                self.ices_[feature_id],
+                positions=np.arange(len(self.values_[feature_id])),
+            )
+            ax.set_xticks(np.arange(len(self.values_[feature_id])))
+            ax.set_xticklabels(
+                self.values_[feature_id], rotation=90, fontdict={"fontsize": 8}
+            )
+            if X is not None:
+                # add an histogram of the number of element by categoris
+                # in bottom of the figure
+                ax_histx = ax.inset_axes([0, -0.45, 1, 0.2], sharex=ax)
+                ax_histx.hist(
+                    _safe_indexing(X, self.features_indices[feature_id], axis=1),
+                    histtype="bar",
+                    bins=np.arange(len(self.values_[feature_id]) + 1),
+                    align="left",
+                    rwidth=0.5,
+                )
+                ax_histx.set_xlim(ax.get_xlim())
+                ax_histx.set_xticklabels(
+                    self.values_[feature_id], rotation=90, fontdict={"fontsize": 8}
+                )
+                ax_histx.set_xlabel(self.feature_names[feature_id])
+            else:
+                ax.set_xlabel(self.feature_names[feature_id])
+        ax.grid(True, linestyle="-", alpha=0.4)
+        return ax
+
+
+def _ax_quantiles(ax, quantiles, twin="x"):
+    """
+    Add quantile percentage labels on a twin axis.
+
+    Parameters
+    ----------
+    ax : matplotlib.axes.Axes
+        The matplotlib axis to add quantile labels to.
+    quantiles : array-like
+        The quantile values to label.
+    twin : {'x', 'y'}, default='x'
+        Which axis to add the twin axis and labels to:
+        - 'x': Add labels on top x-axis
+        - 'y': Add labels on right y-axis
+
+    Raises
+    ------
+    ValueError
+        If twin is not 'x' or 'y'.
+
+    Notes
+    -----
+    Creates a twin axis with percentage labels (0-100%) corresponding to the
+    quantile values. Useful for showing the distribution of data alongside
+    the actual values.
+    """
+    assert twin in ("x", "y"), "'twin' should be one of 'x' or 'y'."
+
+    # Duplicate the 'opposite' axis so we can define a distinct set of ticks for the
+    # desired axis (`twin`).
+    ax_mod = ax.twiny() if twin == "x" else ax.twinx()
+
+    # Set the new axis' ticks for the desired axis.
+    getattr(ax_mod, "set_{twin}ticks".format(twin=twin))(quantiles)
+    # Set the corresponding tick labels.
+
+    # Calculate tick label percentage values for each quantile (bin edge).
+    percentages = (
+        100 * np.arange(len(quantiles), dtype=np.float64) / (len(quantiles) - 1)
+    )
+
+    # If there is a fractional part, add a decimal place to show (part of) it.
+    fractional = (~np.isclose(percentages % 1, 0)).astype("int8")
+
+    getattr(ax_mod, "set_{twin}ticklabels".format(twin=twin))(
+        [
+            "{0:0.{1}f}%".format(percent, format_fraction)
+            for percent, format_fraction in zip(percentages, fractional)
+        ],
+        color="#545454",
+        fontsize=7,
+    )
+    getattr(ax_mod, "set_{twin}lim".format(twin=twin))(
+        getattr(ax, "get_{twin}lim".format(twin=twin))()
+    )
diff --git a/test/baseline/test_plot_pdp.png b/test/baseline/test_plot_pdp.png
new file mode 100644
index 000000000..7f70ef61b
Binary files /dev/null and b/test/baseline/test_plot_pdp.png differ
diff --git a/test/baseline/test_plot_pdp_not_fig.png b/test/baseline/test_plot_pdp_not_fig.png
new file mode 100644
index 000000000..defe47329
Binary files /dev/null and b/test/baseline/test_plot_pdp_not_fig.png differ
diff --git a/test/marginal/test_partial_dependence_plot.py b/test/marginal/test_partial_dependence_plot.py
new file mode 100644
index 000000000..c04980822
--- /dev/null
+++ b/test/marginal/test_partial_dependence_plot.py
@@ -0,0 +1,652 @@
+from copy import deepcopy
+import sys
+
+import numpy as np
+import pytest
+from sklearn.exceptions import NotFittedError
+from sklearn.linear_model import LinearRegression, LogisticRegression, RidgeClassifier
+from sklearn.cluster import FeatureAgglomeration
+from sklearn.multioutput import MultiOutputClassifier
+
+from hidimstat import PDP
+
+
+def seaborn_installed():
+    try:
+        import seaborn
+
+        return True
+    except ImportError:
+        return False
+
+
+def configure_linear_categorial_pdp(X, y):
+    """
+    Configure Partial Dependance Plot (PDP) model with linear regression
+    for feature importance analysis.
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Input data matrix where each column represents a feature
+        and each row a sample.
+    y : array-like of shape (n_samples,)
+        Target variable array.
+    Returns
+    -------
+    importance : array-like
+        Array containing importance scores for each feature.
+        Higher values indicate greater feature importance in predicting
+        the target variable.
+    Notes
+    -----
+    The function performs the following steps:
+    1. Fits a linear regression model on training data
+    2. Configures PDP with linear regression
+    3. Calculates feature importance using the test set
+    The PDP method is a marginal methods scoring with linear
+    regression as the base model.
+    """
+    # create and fit a linear regression model on the training set
+    regression_model = LinearRegression()
+    regression_model.fit(X, y)
+
+    # instantiate CPI model with linear regression imputer
+    pdp = PDP(
+        estimator=regression_model,
+        n_jobs=1,
+    )
+    # calculate feature importance using the test set
+    importance = pdp.importance(X)
+    return np.array(importance)
+
+
+parameter_exact = [
+    ("HiDim", 150, 200, 1, 0.0, 42, 1.0, np.inf, 0.0),
+    ("HiDim with noise", 150, 200, 1, 0.0, 42, 1.0, 10.0, 0.0),
+    ("HiDim with correlated noise", 150, 200, 1, 0.0, 42, 1.0, 10.0, 0.5),
+    ("HiDim with correlated features", 150, 200, 1, 0.8, 42, 1.0, np.inf, 0.0),
+    ("HiDim with high level noise", 150, 200, 10, 0.2, 42, 1.0, 0.5, 0.0),
+]
+
+
+@pytest.mark.parametrize(
+    "n_samples, n_features, support_size, rho, seed, value, signal_noise_ratio, rho_serial",
+    zip(*(list(zip(*parameter_exact))[1:])),
+    ids=list(zip(*parameter_exact))[0],
+)
+def test_pdp_linear_data_exact(data_generator):
+    """Tests the method on linear cases with noise and correlation"""
+    X, y, important_features, _ = data_generator
+
+    importance = configure_linear_categorial_pdp(X, y)
+    # check that importance scores are defined for each feature
+    assert importance.shape == (X.shape[1],)
+    # check that important features have the highest importance scores
+    assert np.all([int(i) in important_features for i in np.argsort(importance)[-1:]])
+
+
+parameter_bad_detection = [
+    ("HiDim with high correlated features", 150, 200, 1, 1.0, 42, 1.0, 5.0, 0.0),
+]
+
+
+@pytest.mark.parametrize(
+    "n_samples, n_features, support_size, rho, seed, value, signal_noise_ratio, rho_serial",
+    zip(*(list(zip(*parameter_bad_detection))[1:])),
+    ids=list(zip(*parameter_bad_detection))[0],
+)
+def test_pdp_linear_data_fail(data_generator):
+    """Tests the method on linear cases with noise and correlation"""
+    X, y, important_features, _ = data_generator
+
+    importance = configure_linear_categorial_pdp(X, y)
+    # check that importance scores are defined for each feature
+    assert importance.shape == (X.shape[1],)
+    # check that important features have the highest importance scores
+    assert np.any(
+        [int(i) not in important_features for i in np.argsort(importance)[-1:]]
+    )
+
+
+@pytest.mark.parametrize(
+    "n_samples, n_features, support_size, rho, seed, value, signal_noise_ratio, rho_serial",
+    zip(*(list(zip(*parameter_exact))[1:])),
+    ids=list(zip(*parameter_exact))[0],
+)
+def test_pdp_classication(data_generator):
+    """Test PDP for a classification problem"""
+    X, y, important_features, not_important_features = data_generator
+    # Create categories
+    y_clf = deepcopy(y)
+    y_clf[np.where(y > 2)] = 0
+    y_clf[np.where(np.logical_and(y <= 2, y > 0))] = 1
+    y_clf[np.where(np.logical_and(y <= 0, y > -2))] = 2
+    y_clf[np.where(y <= -2)] = 3
+    y_clf = np.array(y_clf, dtype=int)
+
+    # Create and fit a logistic regression model on the training set
+    logistic_model = LogisticRegression()
+    logistic_model.fit(X, y_clf)
+
+    pdp = PDP(
+        estimator=logistic_model,
+        n_jobs=1,
+        method="predict_proba",
+    )
+    importance = pdp.fit_importance(X)
+
+    # Check that importance scores are defined for each feature
+    assert importance.shape == (X.shape[1],)
+    # Check that important features have higher mean importance scores
+    assert (
+        importance[important_features].mean()
+        > importance[not_important_features].mean()
+    )
+
+
+# test plot of the pdp
+@pytest.mark.skipif(not seaborn_installed(), reason="seaborn is not installed")
+@pytest.mark.skipif(
+    sys.version_info < (3, 12),
+    reason="Don't test retrocompatibility for the generation of figures.",
+)
+@pytest.mark.mpl_image_compare
+def test_plot_pdp():
+    """Test simple plot of pdp"""
+    import matplotlib.pyplot as plt
+
+    seed = 42
+    n_samples = 150
+    rng = np.random.default_rng(seed)
+    X_cont = rng.random((n_samples, 2))
+    X_cat = np.concatenate(
+        [
+            rng.integers(low=0, high=3, size=(n_samples, 1)),
+            rng.integers(low=0, high=5, size=(n_samples, 1)),
+        ],
+        axis=1,
+    )
+    X = np.hstack([X_cont, X_cat])
+    categories_features = np.ones(X.shape[1], dtype=bool)
+    categories_features[: X_cont.shape[1]] = False
+    y = rng.random((n_samples, 1))
+    fitted_model = LinearRegression().fit(X, y)
+
+    pdp = PDP(estimator=fitted_model, categorical_features=categories_features)
+    pdp.fit_importance(X)
+
+    fig, axs = plt.subplots(ncols=2, nrows=2, figsize=(9, 8), constrained_layout=True)
+    for i in range(4):
+        pdp.plot(feature_id=i, X=X, ax=axs.ravel()[i])
+    return fig
+
+
+@pytest.mark.skipif(not seaborn_installed(), reason="seaborn is not installed")
+@pytest.mark.skipif(
+    sys.version_info < (3, 12),
+    reason="Don't test retrocompatibility for the generation of figures.",
+)
+@pytest.mark.mpl_image_compare
+def test_plot_pdp_not_fig():
+    """Test plot when the figure is not define before"""
+    seed = 42
+    n_samples = 150
+    rng = np.random.default_rng(seed)
+    X_cont = rng.random((n_samples, 2))
+    X_cat = np.concatenate(
+        [
+            rng.integers(low=0, high=3, size=(n_samples, 1)),
+            rng.integers(low=0, high=5, size=(n_samples, 1)),
+        ],
+        axis=1,
+    )
+    X = np.hstack([X_cont, X_cat])
+    categories_features = np.ones(X.shape[1], dtype=bool)
+    categories_features[: X_cont.shape[1]] = False
+    y = rng.random((n_samples, 1))
+    fitted_model = LinearRegression().fit(X, y)
+
+    pdp = PDP(estimator=fitted_model, categorical_features=categories_features)
+    pdp.fit_importance(X)
+
+    ax = pdp.plot(feature_id=0, X=X)
+    return ax.get_figure()
+
+
+##############################################################################
+@pytest.mark.parametrize(
+    "n_samples, n_features, support_size, rho, seed, value, signal_noise_ratio, rho_serial",
+    [(150, 200, 1, 0.0, 42, 1.0, 0.0, 0.0)],
+    ids=["default data"],
+)
+class TestPDPClass:
+    """Test the element of the class"""
+
+    def test_pdp_init(self, data_generator):
+        """Test PDP initialization"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(
+            estimator=fitted_model,
+            method="predict",
+        )
+        assert pdp.n_jobs == 1
+        assert pdp.method == "predict"
+
+    def test_pdp_importance(self, data_generator):
+        """Test importance function PDP"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(
+            estimator=fitted_model,
+        )
+        # Test importance
+        pdp.importance(X.tolist())
+        assert len(pdp.importances_) > 0
+        assert len(pdp.ices_) > 0
+        assert pdp.pvalues_ is None
+
+    def test_pdp_resolution_stat(self, data_generator):
+        """Test resolution statistic PDP"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, statistical_resolution=True)
+        # Test importance
+        pdp.importance(X)
+        assert len(pdp.importances_) > 0
+        assert len(pdp.ices_) > 0
+        assert pdp.pvalues_ is None
+
+    def test_pdp_custom_values(self, data_generator):
+        """Test custom values"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, custom_values={1: [0.1, 0.2, 0.3]})
+        # Test importance
+        pdp.importance(X)
+        assert len(pdp.importances_) > 0
+        assert len(pdp.ices_) > 0
+        assert pdp.pvalues_ is None
+
+    def test_pdp_sample_weight(self, data_generator):
+        """Test sample weight"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, sample_weight=np.random.rand(150))
+        # Test importance
+        pdp.importance(X)
+        assert len(pdp.importances_) > 0
+        assert len(pdp.ices_) > 0
+        assert pdp.pvalues_ is None
+
+    def test_pdp_categories(self, data_generator):
+        """Test categories boolean"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, categorical_features=np.ones(200, dtype=bool))
+        # Test importance
+        pdp.importance(X)
+        assert len(pdp.importances_) > 0
+        assert len(pdp.ices_) > 0
+        assert pdp.pvalues_ is None
+
+    def test_pdp_categories_2(self, data_generator):
+        """Test categories integer"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, categorical_features=range(200))
+        # Test importance
+        pdp.importance(X)
+        assert len(pdp.importances_) > 0
+        assert len(pdp.ices_) > 0
+        assert pdp.pvalues_ is None
+
+    def test_pdp_features(self, data_generator):
+        """Test features"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, features=0)
+        # Test importance
+        pdp.importance(X)
+        assert len(pdp.importances_) > 0
+        assert len(pdp.ices_) > 0
+        assert pdp.pvalues_ is None
+
+    def test_pdp_categorical(
+        self,
+        n_samples,
+        n_features,
+        support_size,
+        rho,
+        seed,
+        value,
+        signal_noise_ratio,
+        rho_serial,
+    ):
+        """Test PDP with categorical variables"""
+        rng = np.random.default_rng(seed)
+        X_cont = rng.random((n_samples, 2))
+        X_cat = np.concatenate(
+            [
+                rng.integers(low=0, high=3, size=(n_samples, 1)),
+                rng.integers(low=0, high=5, size=(n_samples, 1)),
+            ],
+            axis=1,
+        )
+        X = np.hstack([X_cont, X_cat])
+        categories_features = np.ones(X.shape[1], dtype=bool)
+        categories_features[: X_cont.shape[1]] = False
+        y = rng.random((n_samples, 1))
+        fitted_model = LinearRegression().fit(X, y)
+
+        pdp = PDP(estimator=fitted_model, categorical_features=categories_features)
+
+        importances = pdp.fit_importance(X)
+        assert len(importances) == 4
+        assert np.all(importances < 0.1)  # no informative, worse than dummy model
+
+
+##############################################################################
+@pytest.mark.parametrize(
+    "n_samples, n_features, support_size, rho, seed, value, signal_noise_ratio, rho_serial",
+    [(150, 200, 10, 0.0, 42, 1.0, 0.0, 0.0)],
+    ids=["default data"],
+)
+class TestPDPExceptions:
+    """Test class for PDP exceptions"""
+
+    def test_unfitted_estimator(self, data_generator):
+        """Test when using an unfitted estimator"""
+        with pytest.raises(NotFittedError):
+            PDP(
+                estimator=LinearRegression(),
+                method="predict",
+            )
+
+    def test_unknown_wrong_method_regression(self, data_generator):
+        """Test when a wrong prediction method is provided"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+
+        with pytest.raises(
+            ValueError,
+            match="The method parameter is ignored for regressors and "
+            "must be 'auto' or 'predict'.",
+        ):
+            PDP(
+                estimator=fitted_model,
+                method="predict_proba",
+            )
+
+    def test_unknown_predict_method_regression(self, data_generator):
+        """Test when an prediction method is not correct for regressor"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+
+        with pytest.raises(
+            ValueError,
+            match="The method parameter is ignored for regressors and must be 'auto' or 'predict'.",
+        ):
+            PDP(
+                estimator=fitted_model,
+                method="unknown method",
+            )
+
+    def test_unknown_predict_method_classification(self, data_generator):
+        """Test when an unknown prediction method is provided"""
+        X, y, _, _ = data_generator
+        y_clf = deepcopy(y)
+        y_clf[np.where(y > 2)] = 0
+        y_clf[np.where(np.logical_and(y <= 2, y > 0))] = 1
+        y_clf[np.where(np.logical_and(y <= 0, y > -2))] = 2
+        y_clf[np.where(y <= -2)] = 3
+        y_clf = np.array(y_clf, dtype=int)
+        fitted_model = RidgeClassifier().fit(X, y_clf)
+        pdp = PDP(
+            estimator=fitted_model,
+            method="unknown method",
+        )
+
+        with pytest.raises(
+            AttributeError,
+            match="RidgeClassifier has none of the following attributes: unknown method",
+        ):
+            pdp.importance(X)
+
+    def test_unknown_predict_method_multioutput(self, data_generator):
+        """Test when an no multioutput"""
+        X, y, _, _ = data_generator
+        y_clf = deepcopy(y)
+        y_clf[np.where(y > 2)] = 0
+        y_clf[np.where(np.logical_and(y <= 2, y > 0))] = 1
+        y_clf[np.where(np.logical_and(y <= 0, y > -2))] = 2
+        y_clf[np.where(y <= -2)] = 3
+        y_clf = np.array(y_clf, dtype=int)
+        fitted_model = MultiOutputClassifier(RidgeClassifier()).fit(
+            X, np.array([y_clf, y_clf]).T
+        )
+
+        with pytest.raises(
+            ValueError,
+            match="Multiclass-multioutput estimators are not supported",
+        ):
+            PDP(
+                estimator=fitted_model,
+            )
+
+    def test_wrong_estimator(self, data_generator):
+        """Test with wrong type of estimator"""
+        X, y, _, _ = data_generator
+        fitted_model = FeatureAgglomeration().fit(X, y)
+
+        with pytest.raises(
+            ValueError, match="'estimator' must be a fitted regressor or classifier."
+        ):
+            pdp = PDP(estimator=fitted_model)
+
+    def test_no_importance_double(self, data_generator):
+        """Test can't compute importance 2 times"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model)
+        pdp.fit_importance(X=X)
+
+        with pytest.raises(
+            AssertionError, match="Partial Dependance Plot already fitted"
+        ):
+            pdp.importance(X=X)
+
+    def test_fit_warning(self, data_generator):
+        """Test warning of methods"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model)
+
+        with pytest.warns(match="y won't be used"):
+            pdp.fit(y=y)
+
+        with pytest.warns(match="X won't be used"):
+            pdp.fit(X=X)
+
+        with pytest.warns(match="X won't be used"):
+            with pytest.warns(match="y won't be used"):
+                pdp.fit(X=X, y=y)
+
+    def test_fit_importance_warning(self, data_generator):
+        """Test warning of methods"""
+        X, y, _, _ = data_generator
+        pdp = PDP(estimator=LinearRegression().fit(X, y))
+        with pytest.warns(match="y won't be used"):
+            pdp.importance(X=X, y=y)
+        pdp = PDP(estimator=LinearRegression().fit(X, y))
+        with pytest.warns(match="y won't be used"):
+            pdp.fit_importance(X=X, y=y)
+        pdp = PDP(estimator=LinearRegression().fit(X, y))
+        with pytest.warns(match="y won't be used"):
+            with pytest.warns(match="cv won't be used"):
+                pdp.fit_importance(X=X, y=y, cv=[])
+
+    def test_percentage_sequence(self, data_generator):
+        """Test importance method with percentage is a squence of more that 2 elements"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, percentiles=[2, 3, 4])
+
+        with pytest.raises(
+            AssertionError, match="'percentiles' must be a sequence of 2 elements."
+        ):
+            pdp.importance(X=X)
+
+    def test_percentage_no_0_1(self, data_generator):
+        """Test importance method with wrong percentage"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, percentiles=[2, 3])
+
+        with pytest.raises(
+            AssertionError, match=r"'percentiles' values must be in \[0, 1\]."
+        ):
+            pdp.importance(X=X)
+
+    def test_percentage_no_right_order(self, data_generator):
+        """Test importance method with wrong order for percentage"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, percentiles=[0.7, 0.3])
+
+        with pytest.raises(
+            AssertionError,
+            match=r"percentiles\[0\] must be strictly less than percentiles\[1\].",
+        ):
+            pdp.importance(X=X)
+
+    def test_feature_wrong(self, data_generator):
+        """Test importance method with wrong grid resolution"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, features=[0, 1, 3, -10])
+
+        with pytest.raises(ValueError, match=r"all features must be in \[0, "):
+            pdp.importance(X=X)
+
+    def test_empty_categorical_features(self, data_generator):
+        """Test empty list for categorical features"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, categorical_features=[])
+
+        with pytest.raises(
+            ValueError,
+            match="supported. Use `None` instead to indicate that there are no ",
+        ):
+            pdp.importance(X=X)
+
+    def test_empty_categorical_features_binary(self, data_generator):
+        """Test missing feature for categories in binary selection"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, categorical_features=np.ones(50, dtype=bool))
+
+        with pytest.raises(
+            ValueError,
+            match="When `categorical_features` is a boolean array-like, ",
+        ):
+            pdp.importance(X=X)
+
+    def test_empty_categorical_features_wrong_type(self, data_generator):
+        """Test categorical feature is wrong type"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(
+            estimator=fitted_model, categorical_features=np.ones(200, dtype=float)
+        )
+
+        with pytest.raises(
+            ValueError,
+            match="Expected `categorical_features` to be an array-like of boolean, ",
+        ):
+            pdp.importance(X=X)
+
+    def test_grid(self, data_generator):
+        """Test importance method with wrong grid resolution"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, grid_resolution=-10)
+
+        with pytest.raises(
+            AssertionError, match="'grid_resolution' must be strictly greater than 1."
+        ):
+            pdp.importance(X=X)
+
+    def test_percentiles_close(self, data_generator):
+        """Test importance method with closest percentile"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, percentiles=[0.1, 0.100001])
+
+        with pytest.raises(
+            ValueError, match="percentiles are too close to each other,"
+        ):
+            pdp.importance(X=X)
+
+    def test_wrong_custom_value(self, data_generator):
+        """Test high dimention of custom values"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(estimator=fitted_model, custom_values={1: [[1, 2, 3], [4, 5, 6]]})
+
+        with pytest.raises(
+            ValueError,
+            match="The custom grid for some features is not a one-dimensional array.",
+        ):
+            pdp.importance(X=X)
+
+    def test_wrong_type(self, data_generator):
+        """Test wrong type"""
+        X = np.array(["A", "B", "C", np.nan], dtype=object).reshape(-1, 1)
+        y = np.array([0, 1, 0, 1])
+
+        from sklearn.preprocessing import OrdinalEncoder
+        from sklearn.pipeline import make_pipeline
+
+        fitted_model = make_pipeline(
+            OrdinalEncoder(encoded_missing_value=-1),
+            LogisticRegression(),
+        ).fit(X, y)
+
+        pdp = PDP(
+            estimator=fitted_model,
+        )
+
+        with pytest.raises(
+            ValueError, match="Finding unique categories fail due to sorting."
+        ):
+            pdp.importance(X=X)
+
+    def test_reject_interger_data(self, data_generator):
+        """integer should be categorical"""
+        X = np.arange(8).reshape(4, 2)
+        y = np.array([0, 1, 0, 1])
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(
+            estimator=fitted_model,
+        )
+        with pytest.raises(
+            ValueError,
+            match="Partial dependence plots are not supported for integer data: this ",
+        ):
+            pdp.importance(X=X)
+
+    @pytest.mark.skipif(not seaborn_installed(), reason="seaborn is not installed")
+    def test_unfitted_importance(self, data_generator):
+        """Test plot before fitting"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        pdp = PDP(
+            estimator=fitted_model,
+        )
+
+        with pytest.raises(
+            ValueError, match="The importances need to be called before."
+        ):
+            pdp.plot(feature_id=0, X=X)