Skip to content

Online Multimodal Fusion #384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: 2.0.0
Choose a base branch
from
6 changes: 4 additions & 2 deletions bcipy/core/stimuli.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,11 @@ def __call__(self,
for symbol in symbol_set:
data_by_targets_dict[symbol] = []

buffer = stimulus_duration / 5 # seconds, buffer for each inquiry
buffer = 0.5 # seconds, buffer for each inquiry
# NOTE: This buffer is used to account for the screen downtime between each stimulus.
# There is a "duty cycle" of 80% for the stimuli, so we add a buffer of 20% of the stimulus length
# A better way of handling this buffer would be subtracting the flash time of the
# second symbol from the first symbol, which gives a more accurate representation of
# "stimulus duration".
window_length = (stimulus_duration + buffer) * num_stimuli_per_inquiry # in seconds

reshaped_data = []
Expand Down
1 change: 0 additions & 1 deletion bcipy/signal/evaluate/fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,6 @@ def calculate_eeg_gaze_fusion_acc(
# generate a tuple that matches the index of the symbol with the symbol itself:
symbol_to_index = {symbol: i for i, symbol in enumerate(symbol_set)}

# train and save the gaze model as a pkl file:
reshaped_data = centralized_gaze_data_train.reshape(
(len(centralized_gaze_data_train), inquiry_length * predefined_dimensions))
units = 1e4
Expand Down
2 changes: 1 addition & 1 deletion bcipy/signal/model/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ These models may be trained and evalulated, but are still being integrated into

*Note*: The gaze model is currently under development and is not yet fully implemented.

These models are used to update the posterior probability of stimuli viewed by a user based on gaze data. The gaze model uses a generative model to estimate the likelihood of the gaze data given the stimuli. There are several models implemented in this module, including a Gaussian Mixture Model (GMIndividual and GMCentralized) and Gaussian Process Model (GaussianProcess). When training data via offline analysis, if the data folder contains gaze data, the gaze model will be trained and saved to the output directory.
These models are used to update the posterior probability of stimuli viewed by a user based on gaze data. The gaze model uses a generative model to estimate the likelihood of the gaze data given the stimuli. There are several models implemented in this module, including a Gaussian Mixture Model (GMIndividual) and a Gaussian Process Model (GaussianProcess). When training data via offline analysis, if the data folder contains gaze data, the gaze model will be trained and saved to the output directory.

## Fusion Analyis

Expand Down
3 changes: 1 addition & 2 deletions bcipy/signal/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
from bcipy.signal.model.pca_rda_kde.pca_rda_kde import PcaRdaKdeModel
from bcipy.signal.model.rda_kde.rda_kde import RdaKdeModel
from bcipy.signal.model.gaussian_mixture.gaussian_mixture import (
GMIndividual, GMCentralized, GaussianProcess)
GMIndividual, GaussianProcess)


__all__ = [
"SignalModel",
"PcaRdaKdeModel",
"RdaKdeModel",
'GMIndividual',
'GMCentralized',
'GaussianProcess',
"ModelEvaluationReport",
]
3 changes: 1 addition & 2 deletions bcipy/signal/model/gaussian_mixture/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from .gaussian_mixture import GMIndividual, GMCentralized, GaussianProcess, GazeModelResolver
from .gaussian_mixture import GMIndividual, GaussianProcess, GazeModelResolver

__all__ = [
'GMIndividual',
'GMCentralized',
'GaussianProcess',
'GazeModelResolver'
]
204 changes: 72 additions & 132 deletions bcipy/signal/model/gaussian_mixture/gaussian_mixture.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import pickle

Check warning on line 1 in bcipy/signal/model/gaussian_mixture/gaussian_mixture.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

bcipy/signal/model/gaussian_mixture/gaussian_mixture.py#L1

Consider possible security implications associated with pickle module. (B403)
from pathlib import Path
from typing import List
from enum import Enum

from bcipy.exceptions import SignalException
from bcipy.core.stimuli import GazeReshaper
from bcipy.signal.model import SignalModel

Expand All @@ -17,7 +19,6 @@
"""Enum for gaze model types"""
GAUSSIAN_PROCESS = "GaussianProcess"
GM_INDIVIDUAL = "GMIndividual"
GM_CENTRALIZED = "GMCentralized"

def __str__(self):
return self.value
Expand All @@ -31,8 +32,6 @@
return GazeModelType.GAUSSIAN_PROCESS
elif label == "GMIndividual":
return GazeModelType.GM_INDIVIDUAL
elif label == "GMCentralized":
return GazeModelType.GM_CENTRALIZED
else:
raise ValueError(f"Model type {label} not recognized.")

Expand All @@ -51,8 +50,6 @@
return GaussianProcess(*args, **kwargs)
elif model_type == GazeModelType.GM_INDIVIDUAL:
return GMIndividual(*args, **kwargs)
elif model_type == GazeModelType.GM_CENTRALIZED:
return GMCentralized(*args, **kwargs)
else:
raise ValueError(
f"Model type {model_type} not able to resolve. Not registered in GazeModelResolver.")
Expand All @@ -66,24 +63,74 @@
def __init__(self, *args, **kwargs):
self.ready_to_predict = False
self.acc = None
self.time_average = None
self.centralized_data = None
self.model = None

def fit(self, training_data: np.ndarray):
...
def fit(self, time_avg: np.ndarray, cent_data: np.ndarray):
"""Fit the Gaussian Process model to the training data.
Args:
time_avg Dict[(np.ndarray)]: Time average for the symbols.
mean_data (np.ndarray): Sample average for the training data.
cov_data (np.ndarray): Covariance matrix for the training data.
"""
self.time_average = time_avg
self.centralized_data = cent_data
self.ready_to_predict = True
return self

def evaluate(self, test_data: np.ndarray, test_labels: np.ndarray):
...

def evaluate_likelihood(self, data: np.ndarray, symbols: List[str],

Check warning on line 85 in bcipy/signal/model/gaussian_mixture/gaussian_mixture.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

bcipy/signal/model/gaussian_mixture/gaussian_mixture.py#L85

Number of parameters was 2 in 'SignalModel.evaluate_likelihood' and is now 4 in overriding 'GaussianProcess.evaluate_likelihood' method
symbol_set: List[str]) -> np.ndarray:
if not self.ready_to_predict:
raise SignalException("must use model.fit() before model.evaluate_likelihood()")

gaze_log_likelihoods = np.zeros((len(symbol_set)))
# Clip the pre-saved centralized data to the length of our test data
cent_data = self.centralized_data[:, :, :data.shape[1]]

Check notice on line 92 in bcipy/signal/model/gaussian_mixture/gaussian_mixture.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

bcipy/signal/model/gaussian_mixture/gaussian_mixture.py#L92

Value of type "Any | None" is not indexable. (index)
reshaped_data = cent_data.reshape((len(cent_data), data.shape[0] * data.shape[1]))
cov_matrix = np.cov(reshaped_data, rowvar=False)
reshaped_mean = np.mean(reshaped_data, axis=0)
eps = 10e-1 # add a small value to the diagonal to make the cov matrix invertible
inv_cov_matrix = np.linalg.inv(cov_matrix + np.eye(len(cov_matrix)) * eps)

for idx, sym in enumerate(symbol_set):
if self.time_average[sym] == []:

Check notice on line 100 in bcipy/signal/model/gaussian_mixture/gaussian_mixture.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

bcipy/signal/model/gaussian_mixture/gaussian_mixture.py#L100

Value of type "Any | None" is not indexable. (index)
gaze_log_likelihoods[idx] = -100000 # set a very small value
else:
# Compute the likelihood of the data for each symbol
central_data = self.subtract_mean(data, self.time_average[sym])

Check notice on line 104 in bcipy/signal/model/gaussian_mixture/gaussian_mixture.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

bcipy/signal/model/gaussian_mixture/gaussian_mixture.py#L104

Value of type "Any | None" is not indexable. (index)
# flatten this data
flattened_data = np.reshape(central_data, (-1, ))
diff = flattened_data - reshaped_mean
numerator = -np.dot(diff.T, np.dot(inv_cov_matrix, diff)) / 2
denominator = 0
unnormalized_log_likelihood_gaze = numerator - denominator
gaze_log_likelihoods[idx] = unnormalized_log_likelihood_gaze
# Find the gaze_likelihoods for the symbols in the inquiry
gaze_likelihood = np.exp(gaze_log_likelihoods)

return gaze_likelihood # used in multimodal update

def predict(self, test_data: np.ndarray, inquiry, symbol_set) -> np.ndarray:
...

def predict_proba(self, test_data: np.ndarray) -> np.ndarray:
...

def save(self, path: Path):
...
def save(self, path: Path) -> None:
"""Save model weights (e.g. after training) to `path`"""
with open(path, "wb") as f:
pickle.dump(self.model, f)

def load(self, path: Path):
...
def load(self, path: Path) -> SignalModel:
"""Load pretrained model from `path`"""
with open(path, "rb") as f:
model = pickle.load(f)

Check warning on line 131 in bcipy/signal/model/gaussian_mixture/gaussian_mixture.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

bcipy/signal/model/gaussian_mixture/gaussian_mixture.py#L131

Pickle and modules that wrap it can be unsafe when used to deserialize untrusted data, possible security issue. (B301)

return model

def centralize(self, data: np.ndarray, symbol_pos: np.ndarray) -> np.ndarray:
""" Using the symbol locations in matrix, centralize all data (in Tobii units).
Expand Down Expand Up @@ -207,9 +254,12 @@

return likelihoods

def evaluate_likelihood(self, data: np.ndarray) -> np.ndarray:
data_length, _ = data.shape
def evaluate_likelihood(self, data: np.ndarray, symbols: List[str],
symbol_set: List[str]) -> np.ndarray:
if not self.ready_to_predict:
raise SignalException("must use model.fit() before model.evaluate_likelihood()")

data_length, _ = data.shape
likelihoods = np.zeros((data_length, self.num_components), dtype=object)

# Find the likelihoods by insterting the test data into the pdf of each component
Expand All @@ -222,124 +272,14 @@

return likelihoods

def save(self, path: Path):
"""Save model state to the provided checkpoint"""
...

def load(self, path: Path):
"""Load model state from the provided checkpoint"""
...


class GMCentralized(SignalModel):
'''Gaze model that uses all symbols to fit a single Gaussian '''
reshaper = GazeReshaper()
name = "gaze_model_combined"

def __init__(self, num_components=4, random_state=0, *args, **kwargs):
self.num_components = num_components # number of gaussians to fit
self.random_state = random_state
self.acc = None
self.means = None
self.covs = None

self.ready_to_predict = False

def fit(self, train_data: np.ndarray):
model = GaussianMixture(n_components=self.num_components, random_state=self.random_state, init_params='kmeans')
model.fit(train_data)
self.model = model

self.means = model.means_
self.covs = model.covariances_

self.ready_to_predict = True
return self

def evaluate(self, predictions, true_labels) -> np.ndarray:
'''
Compute performance characteristics on the provided test data and labels.

Parameters:
-----------
predictions: predicted labels for each test point per symbol
true_labels: true labels for each test point per symbol
Returns:
--------
accuracy_per_symbol: accuracy per symbol
'''
accuracy_per_symbol = np.sum(predictions == true_labels) / len(predictions) * 100
self.acc = accuracy_per_symbol
return accuracy_per_symbol

def predict(self, test_data: np.ndarray) -> np.ndarray:
'''
Compute log-likelihood of each sample.
Predict the labels for the test data.
'''
data_length, _ = test_data.shape
predictions = np.zeros(data_length, dtype=object)
likelihoods = self.model.predict_proba(test_data)

for i in range(data_length):
# Find the argmax of the likelihoods to get the predictions
predictions[i] = np.argmax(likelihoods[i])

return predictions

def predict_proba(self, test_data: np.ndarray) -> np.ndarray:
'''
Compute log-likelihood of each sample.
Predict the labels for the test data.

test_data:
'''
data_length, _ = test_data.shape

likelihoods = np.zeros((data_length, self.num_components), dtype=object)

# Find the likelihoods by insterting the test data into the pdf of each component
for i in range(data_length):
for k in range(self.num_components):
mu = self.means[k]
sigma = self.covs[k]

likelihoods[i, k] = stats.multivariate_normal.pdf(test_data[i], mu, sigma)

return likelihoods

def calculate_acc(self, predictions: int, counter: int):
'''
Compute model performance characteristics on the provided test data and labels.

predictions: predicted labels for each test point per symbol
counter: true labels for each test point per symbol
'''
accuracy_per_symbol = np.sum(predictions == counter) / len(predictions) * 100
def save(self, path: Path) -> None:
"""Save model weights (e.g. after training) to `path`"""
with open(path, "wb") as f:
pickle.dump(self.model, f)

return accuracy_per_symbol
def load(self, path: Path) -> SignalModel:
"""Load pretrained model from `path`"""
with open(path, "rb") as f:
model = pickle.load(f)

def save(self, path: Path):
"""Save model state to the provided checkpoint"""
...

def load(self, path: Path):
"""Load model state from the provided checkpoint"""
...

def centralize(self, data: np.ndarray, symbol_pos: np.ndarray) -> np.ndarray:
""" Using the symbol locations in matrix, centralize all data (in Tobii units).
This data will only be used in certain model types.
Args:
data (np.ndarray): Data in shape of num_samples x num_dimensions
symbol_pos (np.ndarray(float)): Array of the current symbol posiiton in Tobii units
Returns:
new_data (np.ndarray): Centralized data in shape of num_samples x num_dimensions
"""
new_data = np.copy(data)
for i in range(len(data)):
# new_data[i] = data[i] - symbol_pos
new_data[:2, i] = data[:2, i] - symbol_pos
new_data[2:, i] = data[2:, i] - symbol_pos

return new_data
return model
Loading