From 2ace3ae8dc96a630148b72ca43e616089bcd45ca Mon Sep 17 00:00:00 2001
From: "Rose K. Cersonsky" <47536110+rosecers@users.noreply.github.com>
Date: Fri, 17 Nov 2023 17:31:20 -0600
Subject: [PATCH 1/3] Making the trace component of SparseKernelNormalizer more
 memory-friendly

---
 src/skmatter/preprocessing/_data.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py
index 07160dea4..b2a89f3aa 100644
--- a/src/skmatter/preprocessing/_data.py
+++ b/src/skmatter/preprocessing/_data.py
@@ -524,7 +524,11 @@ def fit(self, Knm, Kmm, y=None, sample_weight=None):
         if self.with_trace:
             Knm_centered = Knm - self.K_fit_rows_
 
-            Khat = Knm_centered @ np.linalg.pinv(Kmm, self.rcond) @ Knm_centered.T
+            # The following is more correctly written as Knm @ Kmm^{-1} @ Knm.T
+            # but has been changed to Knm.T @ Knm @ Kmm^{-1} to avoid the memory
+            # overload often caused by storing n x n matrices. This is fine
+            # for the following trace, but should not be used for other operations.
+            Khat = Knm_centered.T @ Knm_centered @ np.linalg.pinv(Kmm, self.rcond)
 
             self.scale_ = np.sqrt(np.trace(Khat) / Knm.shape[0])
         else:

From c0c7c65197ece080a75ec57d6ef15866b684ff79 Mon Sep 17 00:00:00 2001
From: "Rose K. Cersonsky" <47536110+rosecers@users.noreply.github.com>
Date: Mon, 20 Nov 2023 09:26:46 -0600
Subject: [PATCH 2/3] Update src/skmatter/preprocessing/_data.py

Co-authored-by: Alexander Goscinski <alex.goscinski@posteo.de>
---
 src/skmatter/preprocessing/_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py
index b2a89f3aa..4fbb2d7b9 100644
--- a/src/skmatter/preprocessing/_data.py
+++ b/src/skmatter/preprocessing/_data.py
@@ -528,7 +528,7 @@ def fit(self, Knm, Kmm, y=None, sample_weight=None):
             # but has been changed to Knm.T @ Knm @ Kmm^{-1} to avoid the memory
             # overload often caused by storing n x n matrices. This is fine
             # for the following trace, but should not be used for other operations.
-            Khat = Knm_centered.T @ Knm_centered @ np.linalg.pinv(Kmm, self.rcond)
+            Khat_trace = np.trace(Knm_centered.T @ Knm_centered @ np.linalg.pinv(Kmm, self.rcond))
 
             self.scale_ = np.sqrt(np.trace(Khat) / Knm.shape[0])
         else:

From ca19af2b62d0f5f8562fcac1b9804474e25ae938 Mon Sep 17 00:00:00 2001
From: "Rose K. Cersonsky" <47536110+rosecers@users.noreply.github.com>
Date: Mon, 20 Nov 2023 09:28:07 -0600
Subject: [PATCH 3/3] Linting

---
 src/skmatter/preprocessing/_data.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py
index 4fbb2d7b9..a805e7460 100644
--- a/src/skmatter/preprocessing/_data.py
+++ b/src/skmatter/preprocessing/_data.py
@@ -528,9 +528,11 @@ def fit(self, Knm, Kmm, y=None, sample_weight=None):
             # but has been changed to Knm.T @ Knm @ Kmm^{-1} to avoid the memory
             # overload often caused by storing n x n matrices. This is fine
             # for the following trace, but should not be used for other operations.
-            Khat_trace = np.trace(Knm_centered.T @ Knm_centered @ np.linalg.pinv(Kmm, self.rcond))
+            Khat_trace = np.trace(
+                Knm_centered.T @ Knm_centered @ np.linalg.pinv(Kmm, self.rcond)
+            )
 
-            self.scale_ = np.sqrt(np.trace(Khat) / Knm.shape[0])
+            self.scale_ = Khat_trace / Knm.shape[0]
         else:
             self.scale_ = 1.0