CAMBI-tech · tab-cmd · May 12, 2025 · Mar 12, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/bcipy/core/stimuli.py b/bcipy/core/stimuli.py
@@ -307,9 +307,11 @@ def __call__(self,
         for symbol in symbol_set:
             data_by_targets_dict[symbol] = []
 
-        buffer = stimulus_duration / 5  # seconds, buffer for each inquiry
+        buffer = 0.5  # seconds, buffer for each inquiry
         # NOTE: This buffer is used to account for the screen downtime between each stimulus.
-        # There is a "duty cycle" of 80% for the stimuli, so we add a buffer of 20% of the stimulus length
+        # A better way of handling this buffer would be subtracting the flash time of the
+        # second symbol from the first symbol, which gives a more accurate representation of
+        # "stimulus duration".
         window_length = (stimulus_duration + buffer) * num_stimuli_per_inquiry   # in seconds
 
         reshaped_data = []

diff --git a/bcipy/signal/evaluate/fusion.py b/bcipy/signal/evaluate/fusion.py
@@ -260,7 +260,6 @@ def calculate_eeg_gaze_fusion_acc(
         # generate a tuple that matches the index of the symbol with the symbol itself:
         symbol_to_index = {symbol: i for i, symbol in enumerate(symbol_set)}
 
-        # train and save the gaze model as a pkl file:
         reshaped_data = centralized_gaze_data_train.reshape(
             (len(centralized_gaze_data_train), inquiry_length * predefined_dimensions))
         units = 1e4

diff --git a/bcipy/signal/model/README.md b/bcipy/signal/model/README.md
@@ -62,7 +62,7 @@ These models may be trained and evalulated, but are still being integrated into
 
 *Note*: The gaze model is currently under development and is not yet fully implemented.
 
-These models are used to update the posterior probability of stimuli viewed by a user based on gaze data. The gaze model uses a generative model to estimate the likelihood of the gaze data given the stimuli. There are several models implemented in this module, including a Gaussian Mixture Model (GMIndividual and GMCentralized) and Gaussian Process Model (GaussianProcess). When training data via offline analysis, if the data folder contains gaze data, the gaze model will be trained and saved to the output directory.
+These models are used to update the posterior probability of stimuli viewed by a user based on gaze data. The gaze model uses a generative model to estimate the likelihood of the gaze data given the stimuli. There are several models implemented in this module, including a Gaussian Mixture Model (GMIndividual) and a Gaussian Process Model (GaussianProcess). When training data via offline analysis, if the data folder contains gaze data, the gaze model will be trained and saved to the output directory.
 
 ## Fusion Analyis
 

diff --git a/bcipy/signal/model/__init__.py b/bcipy/signal/model/__init__.py
@@ -2,15 +2,14 @@
 from bcipy.signal.model.pca_rda_kde.pca_rda_kde import PcaRdaKdeModel
 from bcipy.signal.model.rda_kde.rda_kde import RdaKdeModel
 from bcipy.signal.model.gaussian_mixture.gaussian_mixture import (
-    GMIndividual, GMCentralized, GaussianProcess)
+    GMIndividual, GaussianProcess)
 
 
 __all__ = [
     "SignalModel",
     "PcaRdaKdeModel",
     "RdaKdeModel",
     'GMIndividual',
-    'GMCentralized',
     'GaussianProcess',
     "ModelEvaluationReport",
 ]
diff --git a/bcipy/signal/model/gaussian_mixture/__init__.py b/bcipy/signal/model/gaussian_mixture/__init__.py
@@ -1,8 +1,7 @@
-from .gaussian_mixture import GMIndividual, GMCentralized, GaussianProcess, GazeModelResolver
+from .gaussian_mixture import GMIndividual, GaussianProcess, GazeModelResolver
 
 __all__ = [
     'GMIndividual',
-    'GMCentralized',
     'GaussianProcess',
     'GazeModelResolver'
 ]
diff --git a/bcipy/signal/model/gaussian_mixture/gaussian_mixture.py b/bcipy/signal/model/gaussian_mixture/gaussian_mixture.py
@@ -1,7 +1,9 @@
+import pickle
 from pathlib import Path
 from typing import List
 from enum import Enum
 
+from bcipy.exceptions import SignalException
 from bcipy.core.stimuli import GazeReshaper
 from bcipy.signal.model import SignalModel
 
@@ -17,7 +19,6 @@
     """Enum for gaze model types"""
     GAUSSIAN_PROCESS = "GaussianProcess"
     GM_INDIVIDUAL = "GMIndividual"
-    GM_CENTRALIZED = "GMCentralized"
 
     def __str__(self):
         return self.value
@@ -31,8 +32,6 @@
             return GazeModelType.GAUSSIAN_PROCESS
         elif label == "GMIndividual":
             return GazeModelType.GM_INDIVIDUAL
-        elif label == "GMCentralized":
-            return GazeModelType.GM_CENTRALIZED
         else:
             raise ValueError(f"Model type {label} not recognized.")
 
@@ -51,8 +50,6 @@
             return GaussianProcess(*args, **kwargs)
         elif model_type == GazeModelType.GM_INDIVIDUAL:
             return GMIndividual(*args, **kwargs)
-        elif model_type == GazeModelType.GM_CENTRALIZED:
-            return GMCentralized(*args, **kwargs)
         else:
             raise ValueError(
                 f"Model type {model_type} not able to resolve. Not registered in GazeModelResolver.")
@@ -66,24 +63,74 @@
     def __init__(self, *args, **kwargs):
         self.ready_to_predict = False
         self.acc = None
+        self.time_average = None
+        self.centralized_data = None
+        self.model = None
 
-    def fit(self, training_data: np.ndarray):
-        ...
+    def fit(self, time_avg: np.ndarray, cent_data: np.ndarray):
+        """Fit the Gaussian Process model to the training data.
+        Args:
+            time_avg Dict[(np.ndarray)]: Time average for the symbols.
+            mean_data (np.ndarray): Sample average for the training data.
+            cov_data (np.ndarray): Covariance matrix for the training data.
+        """
+        self.time_average = time_avg
+        self.centralized_data = cent_data
+        self.ready_to_predict = True
+        return self
 
     def evaluate(self, test_data: np.ndarray, test_labels: np.ndarray):
         ...
 
+    def evaluate_likelihood(self, data: np.ndarray, symbols: List[str],
+                            symbol_set: List[str]) -> np.ndarray:
+        if not self.ready_to_predict:
+            raise SignalException("must use model.fit() before model.evaluate_likelihood()")
+
+        gaze_log_likelihoods = np.zeros((len(symbol_set)))
+        # Clip the pre-saved centralized data to the length of our test data
+        cent_data = self.centralized_data[:, :, :data.shape[1]]
+        reshaped_data = cent_data.reshape((len(cent_data), data.shape[0] * data.shape[1]))
+        cov_matrix = np.cov(reshaped_data, rowvar=False)
+        reshaped_mean = np.mean(reshaped_data, axis=0)
+        eps = 10e-1  # add a small value to the diagonal to make the cov matrix invertible
+        inv_cov_matrix = np.linalg.inv(cov_matrix + np.eye(len(cov_matrix)) * eps)
+
+        for idx, sym in enumerate(symbol_set):
+            if self.time_average[sym] == []:
+                gaze_log_likelihoods[idx] = -100000  # set a very small value
+            else:
+                # Compute the likelihood of the data for each symbol
+                central_data = self.subtract_mean(data, self.time_average[sym])
+                # flatten this data
+                flattened_data = np.reshape(central_data, (-1, ))
+                diff = flattened_data - reshaped_mean
+                numerator = -np.dot(diff.T, np.dot(inv_cov_matrix, diff)) / 2
+                denominator = 0
+                unnormalized_log_likelihood_gaze = numerator - denominator
+                gaze_log_likelihoods[idx] = unnormalized_log_likelihood_gaze
+        # Find the gaze_likelihoods for the symbols in the inquiry
+        gaze_likelihood = np.exp(gaze_log_likelihoods)
+
+        return gaze_likelihood  # used in multimodal update
+
     def predict(self, test_data: np.ndarray, inquiry, symbol_set) -> np.ndarray:
         ...
 
     def predict_proba(self, test_data: np.ndarray) -> np.ndarray:
         ...
 
-    def save(self, path: Path):
-        ...
+    def save(self, path: Path) -> None:
+        """Save model weights (e.g. after training) to `path`"""
+        with open(path, "wb") as f:
+            pickle.dump(self.model, f)
 
-    def load(self, path: Path):
-        ...
+    def load(self, path: Path) -> SignalModel:
+        """Load pretrained model from `path`"""
+        with open(path, "rb") as f:
+            model = pickle.load(f)
+
+        return model
 
     def centralize(self, data: np.ndarray, symbol_pos: np.ndarray) -> np.ndarray:
         """ Using the symbol locations in matrix, centralize all data (in Tobii units).
@@ -207,9 +254,12 @@
 
         return likelihoods
 
-    def evaluate_likelihood(self, data: np.ndarray) -> np.ndarray:
-        data_length, _ = data.shape
+    def evaluate_likelihood(self, data: np.ndarray, symbols: List[str],
+                            symbol_set: List[str]) -> np.ndarray:
+        if not self.ready_to_predict:
+            raise SignalException("must use model.fit() before model.evaluate_likelihood()")
 
+        data_length, _ = data.shape
         likelihoods = np.zeros((data_length, self.num_components), dtype=object)
 
         # Find the likelihoods by insterting the test data into the pdf of each component
@@ -222,124 +272,14 @@
 
         return likelihoods
 
-    def save(self, path: Path):
-        """Save model state to the provided checkpoint"""
-        ...
-
-    def load(self, path: Path):
-        """Load model state from the provided checkpoint"""
-        ...
-
-
-class GMCentralized(SignalModel):
-    '''Gaze model that uses all symbols to fit a single Gaussian '''
-    reshaper = GazeReshaper()
-    name = "gaze_model_combined"
-
-    def __init__(self, num_components=4, random_state=0, *args, **kwargs):
-        self.num_components = num_components   # number of gaussians to fit
-        self.random_state = random_state
-        self.acc = None
-        self.means = None
-        self.covs = None
-
-        self.ready_to_predict = False
-
-    def fit(self, train_data: np.ndarray):
-        model = GaussianMixture(n_components=self.num_components, random_state=self.random_state, init_params='kmeans')
-        model.fit(train_data)
-        self.model = model
-
-        self.means = model.means_
-        self.covs = model.covariances_
-
-        self.ready_to_predict = True
-        return self
-
-    def evaluate(self, predictions, true_labels) -> np.ndarray:
-        '''
-        Compute performance characteristics on the provided test data and labels.
-
-        Parameters:
-        -----------
-        predictions: predicted labels for each test point per symbol
-        true_labels: true labels for each test point per symbol
-        Returns:
-        --------
-        accuracy_per_symbol: accuracy per symbol
-        '''
-        accuracy_per_symbol = np.sum(predictions == true_labels) / len(predictions) * 100
-        self.acc = accuracy_per_symbol
-        return accuracy_per_symbol
-
-    def predict(self, test_data: np.ndarray) -> np.ndarray:
-        '''
-        Compute log-likelihood of each sample.
-        Predict the labels for the test data.
-        '''
-        data_length, _ = test_data.shape
-        predictions = np.zeros(data_length, dtype=object)
-        likelihoods = self.model.predict_proba(test_data)
-
-        for i in range(data_length):
-            # Find the argmax of the likelihoods to get the predictions
-            predictions[i] = np.argmax(likelihoods[i])
-
-        return predictions
-
-    def predict_proba(self, test_data: np.ndarray) -> np.ndarray:
-        '''
-        Compute log-likelihood of each sample.
-        Predict the labels for the test data.
-
-        test_data:
-        '''
-        data_length, _ = test_data.shape
-
-        likelihoods = np.zeros((data_length, self.num_components), dtype=object)
-
-        # Find the likelihoods by insterting the test data into the pdf of each component
-        for i in range(data_length):
-            for k in range(self.num_components):
-                mu = self.means[k]
-                sigma = self.covs[k]
-
-                likelihoods[i, k] = stats.multivariate_normal.pdf(test_data[i], mu, sigma)
-
-        return likelihoods
-
-    def calculate_acc(self, predictions: int, counter: int):
-        '''
-        Compute model performance characteristics on the provided test data and labels.
-
-        predictions: predicted labels for each test point per symbol
-        counter: true labels for each test point per symbol
-        '''
-        accuracy_per_symbol = np.sum(predictions == counter) / len(predictions) * 100
+    def save(self, path: Path) -> None:
+        """Save model weights (e.g. after training) to `path`"""
+        with open(path, "wb") as f:
+            pickle.dump(self.model, f)
 
-        return accuracy_per_symbol
+    def load(self, path: Path) -> SignalModel:
+        """Load pretrained model from `path`"""
+        with open(path, "rb") as f:
+            model = pickle.load(f)
 
-    def save(self, path: Path):
-        """Save model state to the provided checkpoint"""
-        ...
-
-    def load(self, path: Path):
-        """Load model state from the provided checkpoint"""
-        ...
-
-    def centralize(self, data: np.ndarray, symbol_pos: np.ndarray) -> np.ndarray:
-        """ Using the symbol locations in matrix, centralize all data (in Tobii units).
-        This data will only be used in certain model types.
-        Args:
-            data (np.ndarray): Data in shape of num_samples x num_dimensions
-            symbol_pos (np.ndarray(float)): Array of the current symbol posiiton in Tobii units
-        Returns:
-            new_data (np.ndarray): Centralized data in shape of num_samples x num_dimensions
-        """
-        new_data = np.copy(data)
-        for i in range(len(data)):
-            # new_data[i] = data[i] - symbol_pos
-            new_data[:2, i] = data[:2, i] - symbol_pos
-            new_data[2:, i] = data[2:, i] - symbol_pos
-
-        return new_data
+        return model