From 2ace3ae8dc96a630148b72ca43e616089bcd45ca Mon Sep 17 00:00:00 2001 From: "Rose K. Cersonsky" <47536110+rosecers@users.noreply.github.com> Date: Fri, 17 Nov 2023 17:31:20 -0600 Subject: [PATCH 1/3] Making the trace component of SparseKernelNormalizer more memory-friendly --- src/skmatter/preprocessing/_data.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py index 07160dea4..b2a89f3aa 100644 --- a/src/skmatter/preprocessing/_data.py +++ b/src/skmatter/preprocessing/_data.py @@ -524,7 +524,11 @@ def fit(self, Knm, Kmm, y=None, sample_weight=None): if self.with_trace: Knm_centered = Knm - self.K_fit_rows_ - Khat = Knm_centered @ np.linalg.pinv(Kmm, self.rcond) @ Knm_centered.T + # The following is more correctly written as Knm @ Kmm^{-1} @ Knm.T + # but has been changed to Knm.T @ Knm @ Kmm^{-1} to avoid the memory + # overload often caused by storing n x n matrices. This is fine + # for the following trace, but should not be used for other operations. + Khat = Knm_centered.T @ Knm_centered @ np.linalg.pinv(Kmm, self.rcond) self.scale_ = np.sqrt(np.trace(Khat) / Knm.shape[0]) else: From c0c7c65197ece080a75ec57d6ef15866b684ff79 Mon Sep 17 00:00:00 2001 From: "Rose K. Cersonsky" <47536110+rosecers@users.noreply.github.com> Date: Mon, 20 Nov 2023 09:26:46 -0600 Subject: [PATCH 2/3] Update src/skmatter/preprocessing/_data.py Co-authored-by: Alexander Goscinski --- src/skmatter/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py index b2a89f3aa..4fbb2d7b9 100644 --- a/src/skmatter/preprocessing/_data.py +++ b/src/skmatter/preprocessing/_data.py @@ -528,7 +528,7 @@ def fit(self, Knm, Kmm, y=None, sample_weight=None): # but has been changed to Knm.T @ Knm @ Kmm^{-1} to avoid the memory # overload often caused by storing n x n matrices. This is fine # for the following trace, but should not be used for other operations. - Khat = Knm_centered.T @ Knm_centered @ np.linalg.pinv(Kmm, self.rcond) + Khat_trace = np.trace(Knm_centered.T @ Knm_centered @ np.linalg.pinv(Kmm, self.rcond)) self.scale_ = np.sqrt(np.trace(Khat) / Knm.shape[0]) else: From ca19af2b62d0f5f8562fcac1b9804474e25ae938 Mon Sep 17 00:00:00 2001 From: "Rose K. Cersonsky" <47536110+rosecers@users.noreply.github.com> Date: Mon, 20 Nov 2023 09:28:07 -0600 Subject: [PATCH 3/3] Linting --- src/skmatter/preprocessing/_data.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py index 4fbb2d7b9..a805e7460 100644 --- a/src/skmatter/preprocessing/_data.py +++ b/src/skmatter/preprocessing/_data.py @@ -528,9 +528,11 @@ def fit(self, Knm, Kmm, y=None, sample_weight=None): # but has been changed to Knm.T @ Knm @ Kmm^{-1} to avoid the memory # overload often caused by storing n x n matrices. This is fine # for the following trace, but should not be used for other operations. - Khat_trace = np.trace(Knm_centered.T @ Knm_centered @ np.linalg.pinv(Kmm, self.rcond)) + Khat_trace = np.trace( + Knm_centered.T @ Knm_centered @ np.linalg.pinv(Kmm, self.rcond) + ) - self.scale_ = np.sqrt(np.trace(Khat) / Knm.shape[0]) + self.scale_ = Khat_trace / Knm.shape[0] else: self.scale_ = 1.0