Skip to content

Commit b39bd10

Browse files
committed
Merge branch 'develop' of github.com:sccn/EEG-Dash-Data into develop
2 parents 1fe526f + 2664a6c commit b39bd10

File tree

4 files changed

+369
-132
lines changed

4 files changed

+369
-132
lines changed

eegdash/data_utils.py

+87-1
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,94 @@
66
from pathlib import Path
77
import re
88
import json
9+
from mne.io import BaseRaw
10+
from mne._fiff.utils import _find_channels, _read_segments_file
11+
import s3fs
12+
import tempfile
13+
from mne._fiff.utils import _read_segments_file
914

10-
verbose = False
15+
class RawEEGDash(BaseRaw):
16+
r"""Raw object from EEG-Dash connection with Openneuro S3 file.
17+
18+
Parameters
19+
----------
20+
input_fname : path-like
21+
Path to the S3 file
22+
eog : list | tuple | 'auto'
23+
Names or indices of channels that should be designated EOG channels.
24+
If 'auto', the channel names containing ``EOG`` or ``EYE`` are used.
25+
Defaults to empty tuple.
26+
%(preload)s
27+
Note that preload=False will be effective only if the data is stored
28+
in a separate binary file.
29+
%(uint16_codec)s
30+
%(montage_units)s
31+
%(verbose)s
32+
33+
See Also
34+
--------
35+
mne.io.Raw : Documentation of attributes and methods.
36+
37+
Notes
38+
-----
39+
.. versionadded:: 0.11.0
40+
"""
41+
42+
def __init__(
43+
self,
44+
input_fname,
45+
metadata,
46+
eog=(),
47+
preload=False,
48+
*,
49+
cache_dir='.',
50+
uint16_codec=None,
51+
montage_units="auto",
52+
verbose=None,
53+
):
54+
'''
55+
Get to work with S3 endpoint first, no caching
56+
'''
57+
# Create a simple RawArray
58+
sfreq = metadata['sfreq'] # Sampling frequency
59+
n_chans = metadata['nchans']
60+
n_times = metadata['n_times']
61+
print('n_times', n_times)
62+
ch_names = [f'EEG{d}' for d in range(1,n_chans+1)]
63+
ch_types = ["eeg"] * n_chans
64+
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
65+
self.s3file = input_fname
66+
self.filecache = os.path.join(cache_dir, os.path.basename(self.s3file))
67+
68+
if preload and not os.path.exists(self.filecache):
69+
self._download_s3()
70+
preload = self.filecache
71+
72+
super().__init__(
73+
info,
74+
preload,
75+
last_samps=[n_times-1],
76+
orig_format="double",
77+
verbose=verbose,
78+
)
79+
80+
def _download_s3(self):
81+
filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
82+
filesystem.download(self.s3file, self.filecache)
83+
self.filenames = [self.filecache]
84+
85+
def _read_segment(
86+
self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
87+
):
88+
if not os.path.exists(self.filecache): # not preload
89+
self._download_s3()
90+
else: # not preload and file is not cached
91+
self.filenames = [self.filecache]
92+
return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
93+
94+
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
95+
"""Read a chunk of data from the file."""
96+
_read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
1197

1298

1399
class BIDSDataset():

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ pynwb
77
h5py
88
pymongo
99
joblib
10-
-e git+https://github.com/dungscout96/SignalStore.git@cedf682bf589e57c8ba8253a8ff2d7c33eeae97f#egg=signalstore
10+
-e git+https://github.com/dungscout96/SignalStore.git@cedf682bf589e57c8ba8253a8ff2d7c33eeae97f#egg=signalstore
11+
pynwb

tests/test_s3_mne.ipynb

+197
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"create empty mne-python raw object"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [
15+
{
16+
"name": "stdout",
17+
"output_type": "stream",
18+
"text": [
19+
"Creating RawArray with float64 data, n_channels=74, n_times=747750\n",
20+
" Range : 0 ... 747749 = 0.000 ... 2990.996 secs\n",
21+
"Ready.\n",
22+
"False\n",
23+
"<class 'mne.io.array.array.RawArray'>\n"
24+
]
25+
}
26+
],
27+
"source": [
28+
"import numpy as np\n",
29+
"import mne\n",
30+
"\n",
31+
"# Create a simple RawArray\n",
32+
"sfreq = 250 # Sampling frequency\n",
33+
"ch_names = [f'EEG{d}' for d in range(1,75)]\n",
34+
"ch_types = [\"eeg\"] * 74\n",
35+
"info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)\n",
36+
"\n",
37+
"data = np.random.randn(74, 747750) # 2 channels, 1000 samples\n",
38+
"raw = mne.io.RawArray(data, info)\n",
39+
"\n",
40+
"print(isinstance(raw, mne.io.Raw)) # True\n",
41+
"print(type(raw)) # <class 'mne.io.array.array.RawArray'>"
42+
]
43+
},
44+
{
45+
"cell_type": "markdown",
46+
"metadata": {},
47+
"source": [
48+
"braindecode call __getitem__ of mne.base.Raw, which then calls _getitem which calls _read_segment of BaseRaw. mne uses _read_segment to read a specific range of the file. We want to test whether S3 file via fsspec can be integrated\n",
49+
"It calls _read_segments_file of the BaseRaw class. Any subclass must implement this method. EEGLAB calls fiff reader function: mne/_fiff/utils.py#L200"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": 1,
55+
"metadata": {},
56+
"outputs": [],
57+
"source": [
58+
"# To be able to make edits to repo without having to restart notebook\n",
59+
"%load_ext autoreload\n",
60+
"%autoreload 2\n",
61+
"import sys \n",
62+
"sys.path.append('..')\n",
63+
"from eegdash.data_utils import RawEEGDash"
64+
]
65+
},
66+
{
67+
"cell_type": "code",
68+
"execution_count": 20,
69+
"metadata": {},
70+
"outputs": [
71+
{
72+
"name": "stdout",
73+
"output_type": "stream",
74+
"text": [
75+
"n_times 747750\n",
76+
"Reading 0 ... 747749 = 0.000 ... 2990.996 secs...\n"
77+
]
78+
}
79+
],
80+
"source": [
81+
"eegdash = RawEEGDash('s3://testspeedeegdash/sub-002_task-FaceRecognition_eeg.set', {'sfreq': 250, 'nchans': 74, 'n_times': 747750}, preload=True)"
82+
]
83+
},
84+
{
85+
"cell_type": "code",
86+
"execution_count": 23,
87+
"metadata": {},
88+
"outputs": [
89+
{
90+
"data": {
91+
"text/plain": [
92+
"array([[5.56413960e+07, 4.40817108e-39, 5.60519386e-45, ...,\n",
93+
" 7.10578003e+01, 7.32979889e+01, 6.95856934e+01],\n",
94+
" [5.97010569e-07, 8.72444220e-39, 0.00000000e+00, ...,\n",
95+
" 4.59728317e+01, 4.81444893e+01, 4.25833282e+01],\n",
96+
" [1.67969504e+08, 8.90820568e-39, 7.00649232e-45, ...,\n",
97+
" 4.93649330e+01, 4.92341499e+01, 4.53561974e+01],\n",
98+
" ...,\n",
99+
" [1.00893489e-43, 7.84727140e-44, 8.90820568e-39, ...,\n",
100+
" 4.53600616e+01, 4.10236855e+01, 4.29333000e+01],\n",
101+
" [1.07449142e-38, 8.40779079e-45, 1.02856414e-38, ...,\n",
102+
" 5.49960251e+01, 4.60316620e+01, 4.72489014e+01],\n",
103+
" [4.13273465e-39, 1.12103877e-44, 1.01938998e-38, ...,\n",
104+
" 4.54888268e+01, 3.74752045e+01, 2.96322441e+01]],\n",
105+
" shape=(74, 747750))"
106+
]
107+
},
108+
"execution_count": 23,
109+
"metadata": {},
110+
"output_type": "execute_result"
111+
}
112+
],
113+
"source": [
114+
"eegdash.get_data()"
115+
]
116+
},
117+
{
118+
"cell_type": "markdown",
119+
"metadata": {},
120+
"source": [
121+
"integrate with braindecode"
122+
]
123+
},
124+
{
125+
"cell_type": "code",
126+
"execution_count": 33,
127+
"metadata": {},
128+
"outputs": [],
129+
"source": [
130+
"from braindecode.datasets import BaseDataset, BaseConcatDataset\n",
131+
"eegdash_braindecode = BaseConcatDataset([BaseDataset(eegdash)])"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": 34,
137+
"metadata": {},
138+
"outputs": [],
139+
"source": [
140+
"from braindecode.preprocessing import (\n",
141+
" preprocess, Preprocessor, create_fixed_length_windows)\n",
142+
"windows_ds = create_fixed_length_windows(eegdash_braindecode, start_offset_samples=0, stop_offset_samples=None,\n",
143+
" window_size_samples=1000,\n",
144+
" window_stride_samples=1000, drop_last_window=True,\n",
145+
" preload=False)"
146+
]
147+
},
148+
{
149+
"cell_type": "code",
150+
"execution_count": 36,
151+
"metadata": {},
152+
"outputs": [
153+
{
154+
"data": {
155+
"text/plain": [
156+
"(74, 1000)"
157+
]
158+
},
159+
"execution_count": 36,
160+
"metadata": {},
161+
"output_type": "execute_result"
162+
}
163+
],
164+
"source": [
165+
"windows_ds[0][0].shape"
166+
]
167+
},
168+
{
169+
"cell_type": "code",
170+
"execution_count": null,
171+
"metadata": {},
172+
"outputs": [],
173+
"source": []
174+
}
175+
],
176+
"metadata": {
177+
"kernelspec": {
178+
"display_name": ".venv",
179+
"language": "python",
180+
"name": "python3"
181+
},
182+
"language_info": {
183+
"codemirror_mode": {
184+
"name": "ipython",
185+
"version": 3
186+
},
187+
"file_extension": ".py",
188+
"mimetype": "text/x-python",
189+
"name": "python",
190+
"nbconvert_exporter": "python",
191+
"pygments_lexer": "ipython3",
192+
"version": "3.10.12"
193+
}
194+
},
195+
"nbformat": 4,
196+
"nbformat_minor": 2
197+
}

0 commit comments

Comments
 (0)