Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions data/make_subject_specific_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import random
from sklearn.model_selection import train_test_split
import shutil
import numpy as np

log = logging.getLogger(__name__)

Expand All @@ -19,16 +20,18 @@ def main(cfg: DictConfig) -> None:
log.info(f'Working directory {os.getcwd()}')

data_path = cfg.data_prep.data_path


manifest_path = os.path.join(data_path, "manifest.tsv")
manifest_path = os.path.join(data_path, f"subject_manifests/{cfg.data_prep.subj}/manifest.tsv")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems fine, but any reason for this change?

assert os.path.exists(manifest_path)
manifest = []
with open(manifest_path) as fd:
rd = csv.reader(fd, delimiter="\t", quotechar='"')
for row in rd:
manifest.append(row)
if row[1] == cfg.data_prep.subj:
manifest.append(row)

label_path = os.path.join(data_path, "labels.tsv")
label_path = os.path.join(data_path, f"subject_labels/{cfg.data_prep.subj}/labels.tsv")
assert os.path.exists(label_path)
labels = []
with open(label_path) as fd:
Expand All @@ -38,22 +41,23 @@ def main(cfg: DictConfig) -> None:

out_path = cfg.data_prep.out_path
Path(out_path).mkdir(exist_ok=True, parents=True)
src = os.path.join(data_path, "localization")
src = os.path.join(data_path, f"subject_metadata/{cfg.data_prep.subj}/localization")
dest = os.path.join(out_path, "localization")
if not os.path.exists(dest):
shutil.copytree(src, dest)

src = os.path.join(data_path, "all_ordered_electrodes.json")
src = os.path.join(data_path, f"subject_metadata/{cfg.data_prep.subj}/all_ordered_electrodes.json")
dest = os.path.join(out_path, "all_ordered_electrodes.json")
if not os.path.exists(dest):
shutil.copy(src, dest)


new_manifest, new_labels = [], []
for manifest_record, labels_record in zip(manifest, labels):
if manifest_record[1] == cfg.data_prep.subj:
new_manifest.append(manifest_record)
new_labels.append(labels_record)

manifest_path = os.path.join(out_path, "manifest.tsv")
with open(manifest_path, 'w', newline='') as tsvfile:
writer = csv.writer(tsvfile, delimiter='\t', lineterminator='\n')
Expand All @@ -67,6 +71,4 @@ def main(cfg: DictConfig) -> None:
writer.writerow(record)

if __name__ == "__main__":
main()


main()
19 changes: 17 additions & 2 deletions data/multi_electrode_subj_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,21 @@ def get_subj_data(self):
electrodes = reader.selected_electrodes
localization_df = trial_data.get_brain_region_localization_df()

seeg_data = np.concatenate(seeg_data, axis=1)
return seeg_data, trials, electrodes, localization_df
# Determine total size
total_words = sum(arr.shape[1] for arr in seeg_data)
n_electrodes, _, n_samples = seeg_data[0].shape

neural_data_path = "neural_data_memmap.dat"
neural_data = np.memmap(neural_data_path, dtype=seeg_data[0].dtype,
mode='w+', shape=(n_electrodes, total_words, n_samples))

# Fill from seeg_data
current_idx = 0
for arr in seeg_data:
word_count = arr.shape[1]
neural_data[:, current_idx:current_idx + word_count, :] = arr
current_idx += word_count
del arr

return neural_data, trials, electrodes, localization_df

38 changes: 36 additions & 2 deletions data/speech_nonspeech_subject_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,24 @@ def get_subj_data(self, subject):

electrodes = reader.selected_electrodes
localization_df = trial_data.get_brain_region_localization_df()
neural_data = np.concatenate(seeg_data, axis=1)

# neural_data = np.concatenate(seeg_data, axis=1)

total_words = sum(arr.shape[1] for arr in seeg_data)
n_electrodes, _, n_samples = seeg_data[0].shape

neural_data_path = "neural_data_memmap.dat"
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Will this potentially overwrite another ongoing process? Maybe we can have a unique name for each run?

  2. We may also want to delete this after the run to reduce disk space accumulation

neural_data = np.memmap(neural_data_path, dtype=seeg_data[0].dtype,
mode='w+', shape=(n_electrodes, total_words, n_samples))

# Fill from seeg_data
current_idx = 0
for arr in seeg_data:
word_count = arr.shape[1]
neural_data[:, current_idx:current_idx + word_count, :] = arr
current_idx += word_count
del arr

labels_df = pd.concat(words) #NOTE the index will not be unique, but the location will
#TODO: pretty sure we are missing the get_subj_data method here
return labels_df, neural_data, trials, electrodes, localization_df
Expand Down Expand Up @@ -79,7 +96,24 @@ def get_subj_data(self, subject):

electrodes = reader.selected_electrodes
localization_df = trial_data.get_brain_region_localization_df()
neural_data = np.concatenate(seeg_data, axis=1)

# neural_data = np.concatenate(seeg_data, axis=1)

total_words = sum(arr.shape[1] for arr in seeg_data)
n_electrodes, _, n_samples = seeg_data[0].shape

neural_data_path = "neural_data_memmap.dat"
neural_data = np.memmap(neural_data_path, dtype=seeg_data[0].dtype,
mode='w+', shape=(n_electrodes, total_words, n_samples))

# Fill from seeg_data
current_idx = 0
for arr in seeg_data:
word_count = arr.shape[1]
neural_data[:, current_idx:current_idx + word_count, :] = arr
current_idx += word_count
del arr

labels_df = pd.concat(words) #NOTE the index will not be unique, but the location will
#TODO: pretty sure we are missing the get_subj_data method here
return labels_df, neural_data, trials, electrodes, localization_df
25 changes: 23 additions & 2 deletions data/subject_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,28 @@ def get_subj_data(self, subject, index_subsample=None):
#get electrode labels
electrodes = reader.selected_electrodes
localization_df = trial_data.get_brain_region_localization_df()
neural_data = np.concatenate(seeg_data, axis=1)
#neural_data is [n_electrodes, n_words, n_samples]

# neural_data = np.concatenate(seeg_data, axis=1)
# neural_data is [n_electrodes, n_words, n_samples]

# Determine total size
total_words = sum(arr.shape[1] for arr in seeg_data)
n_electrodes, _, n_samples = seeg_data[0].shape

neural_data_path = "neural_data_memmap.dat"
neural_data = np.memmap(neural_data_path, dtype=seeg_data[0].dtype,
mode='w+', shape=(n_electrodes, total_words, n_samples))

# Fill from seeg_data
current_idx = 0
for arr in seeg_data:
word_count = arr.shape[1]
neural_data[:, current_idx:current_idx + word_count, :] = arr
current_idx += word_count
del arr

# You can read from this later like a regular array:
# neural_data = np.memmap(neural_data_path, dtype=np.float32, mode='r', shape=(...))

words_df = pd.concat(words) #NOTE the index will not be unique, but the location will
return words_df, neural_data, trials, electrodes, localization_df
Loading