scikit-learn-contrib · mathurinm · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024
diff --git a/doc/api.rst b/doc/api.rst
@@ -68,6 +68,7 @@ Datafits
    Poisson
    Quadratic
    QuadraticGroup
+   QuadraticHessian
    QuadraticSVC
    WeightedQuadratic
 
@@ -102,4 +103,4 @@ Experimental
    PDCD_WS
    Pinball
    SqrtQuadratic
-   SqrtLasso
+   SqrtLasso
diff --git a/skglm/datafits/__init__.py b/skglm/datafits/__init__.py
@@ -1,6 +1,6 @@
 from .base import BaseDatafit, BaseMultitaskDatafit
 from .single_task import (Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma,
-                          Cox, WeightedQuadratic,)
+                          Cox, WeightedQuadratic, QuadraticHessian,)
 from .multi_task import QuadraticMultiTask
 from .group import QuadraticGroup, LogisticGroup
 
@@ -9,5 +9,6 @@
     BaseDatafit, BaseMultitaskDatafit,
     Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma, Cox,
     QuadraticMultiTask,
-    QuadraticGroup, LogisticGroup, WeightedQuadratic
+    QuadraticGroup, LogisticGroup, WeightedQuadratic,
+    QuadraticHessian
 ]
diff --git a/skglm/datafits/single_task.py b/skglm/datafits/single_task.py
@@ -239,6 +239,41 @@ def intercept_update_step(self, y, Xw):
         return np.sum(self.sample_weights * (Xw - y)) / self.sample_weights.sum()
 
 
+class QuadraticHessian(BaseDatafit):
+    r"""Quadratic datafit where we pass the Hessian A directly.
+
+    The datafit reads:
+
+    .. math:: 1 / 2 x^(\top) A x + \langle b, x \rangle
+
+    For a symmetric A. Up to a constant, it is the same as a Quadratic, with
+    :math:`A = 1 / (n_"samples") X^(\top)X` and :math:`b = - 1 / n_"samples" X^(\top)y`.
+    When the Hessian is available, this datafit is more efficient than using Quadratic.
+    """
+
+    def __init__(self):
+        pass
+
+    def get_spec(self):
+        pass
+
+    def params_to_dict(self):
+        return dict()
+
+    def get_lipschitz(self, A, b):
+        n_features = A.shape[0]
+        lipschitz = np.zeros(n_features, dtype=A.dtype)
+        for j in range(n_features):
+            lipschitz[j] = A[j, j]
+        return lipschitz
+
+    def gradient_scalar(self, A, b, w, Ax, j):
+        return Ax[j] + b[j]
+
+    def value(self, b, x, Ax):
+        return 0.5 * (x*Ax).sum() + (b*x).sum()
+
+
 @njit
 def sigmoid(x):
     """Vectorwise sigmoid."""

diff --git a/skglm/estimators.py b/skglm/estimators.py
@@ -21,7 +21,7 @@
 from skglm.utils.jit_compilation import compiled_clone
 from skglm.solvers import AndersonCD, MultiTaskBCD, GroupBCD
 from skglm.datafits import (Cox, Quadratic, Logistic, QuadraticSVC,
-                            QuadraticMultiTask, QuadraticGroup)
+                            QuadraticMultiTask, QuadraticGroup,)
 from skglm.penalties import (L1, WeightedL1, L1_plus_L2, L2, WeightedGroupL2,
                              MCPenalty, WeightedMCPenalty, IndicatorBox, L2_1)
 from skglm.utils.data import grp_converter

diff --git a/skglm/tests/test_datafits.py b/skglm/tests/test_datafits.py
@@ -6,7 +6,7 @@
 from numpy.testing import assert_allclose, assert_array_less
 
 from skglm.datafits import (Huber, Logistic, Poisson, Gamma, Cox, WeightedQuadratic,
-                            Quadratic,)
+                            Quadratic, QuadraticHessian)
 from skglm.penalties import L1, WeightedL1
 from skglm.solvers import AndersonCD, ProxNewton
 from skglm import GeneralizedLinearEstimator
@@ -219,5 +219,24 @@ def test_sample_weights(fit_intercept):
     # np.testing.assert_equal(n_iter, n_iter_overs)
 
 
+def test_HessianQuadratic():
+    n_samples = 20
+    n_features = 10
+    X, y, _ = make_correlated_data(
+        n_samples=n_samples, n_features=n_features, random_state=0)
+    A = X.T @ X / n_samples
+    b = -X.T @ y / n_samples
+    alpha = np.max(np.abs(b)) / 10
+
+    pen = L1(alpha)
+    solv = AndersonCD(warm_start=False, verbose=2, fit_intercept=False)
+    lasso = GeneralizedLinearEstimator(Quadratic(), pen, solv).fit(X, y)
+    qpl1 = GeneralizedLinearEstimator(QuadraticHessian(), pen, solv).fit(A, b)
+
+    np.testing.assert_allclose(lasso.coef_, qpl1.coef_)
+    # check that it's not just because we got alpha too high and thus 0 coef
+    np.testing.assert_array_less(0.1, np.max(np.abs(qpl1.coef_)))
+
+
 if __name__ == '__main__':
     pass