-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocessET_MEGEOS.py
355 lines (302 loc) · 13.9 KB
/
preprocessET_MEGEOS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 20 16:34:24 2022
@author: fm02
"""
import os
import numpy as np
import pandas as pd
import re
import pickle
from pygazeanalyser.edfreader import read_edf
DISPSIZE = (1920, 1080)
def get_blinks(data_edf):
blinks=[]
for i,trial in enumerate(data_edf):
blinks.append(data_edf[i]['events']['Eblk']) # get all blinks
blinks = [x for x in blinks if x != []]
blinks = [item for sublist in blinks for item in sublist]
return blinks
def read_edf_plain(filename):
"""Get a dataframe containing only and all the events from the EDF file,
with the trackertime, not dividing the trials"""
# check if the file exists
if os.path.isfile(filename):
# open file
f = open(filename, 'r')
# raise exception if the file does not exist
else:
raise Exception("Error in read_edf: file '%s' does not exist" % filename)
raw = f.readlines()
f.close()
# variables
data = []
event = []
timepoint = []
# loop through all lines
for line in raw:
if line[0:4] == "SFIX":
l = line[9:]
timepoint.append(int(l))
event.append(line[0:4])
elif line[0:4] == "EFIX":
l = line[9:]
l = l.split('\t')
timepoint.append(int(l[1]))
event.append(line[0:4])
# saccade start
elif line[0:5] == 'SSACC':
l = line[9:]
timepoint.append(int(l))
event.append(line[0:5])
# saccade end
elif line[0:5] == "ESACC":
l = line[9:]
l = l.split('\t')
timepoint.append(int(l[1]))
event.append(line[0:5])
# blink start
elif line[0:6] == "SBLINK":
l = line[9:]
timepoint.append(int(l))
event.append(line[0:6])
# blink end
elif line[0:6] == "EBLINK":
l = line[9:]
l = l.split('\t')
timepoint.append(int(l[1]))
event.append(line[0:6])
# return
data = pd.DataFrame()
data['time'] = np.array(timepoint)
data['event'] = np.array(event)
return data
def fixAOI(data_edf,data_plain):
"""Get all fixations within AOI. Checks that are not followed by a regression
after the first fixation within the AOI + trials that do not contain
a blink or error"""
# get all fixation durations within a certain AOI for all trials for one subject
# dur_all is the list where we include all the fixation durations that
# respect certain inclusion criteria
#
data_trial = pd.DataFrame(columns=['IDStim',
'FFD',
'time',
'fixated'])
for i,trial in enumerate(data_edf):
ID_stim = trial['events']['msg'][2][1][-4:-1]
stimonset = trial['events']['msg'][0][0]
pd_fix = pd.DataFrame.from_records(trial['events']['Efix'],
columns=['start',
'end',
'duration',
'x',
'y'])
# exclude those trials where all fixations are outside the screen
# it used to happe if there was an error in the gaze position detection
# it should not be a problem now, considering that gaze is required
# to trigger the start of the sentence
if (((pd_fix['x'] < 0).all()) or ((pd_fix['x'] > DISPSIZE[0]).all())):
pass
elif (((pd_fix['y'] < 0).all()) or ((pd_fix['y'] > DISPSIZE[1]).all())):
pass
# or when no fixations have been detected
elif len(pd_fix)<2:
pass
else:
# consider only fixations following a the first leftmost fixation
# !! this is now useless and potentially problematic considering that
# participants fixate on the left side of the screen (and not the centre)
# before the appearance of the sentence
# while pd_fix['x'][0]>pd_fix['x'][1]:
# pd_fix.drop([0],inplace=True)
# pd_fix.reset_index(drop=True, inplace=True)
# the following info is gathered from the
# stimulus presentation software (communicated the following msgs)
# tuple indicating dimension of each sentence in pixels
size = re.search("SIZE OF THE STIMULUS: (.*)\n",trial['events']['msg'][4][1])
size = eval(size.group(1)) # tuple (width,height)
# size of each letter in pixels
# this should is identical for each sentence, equal to 19 in our study
unit = re.search("NUMBER OF CHARACTERS: (.*)\n",trial['events']['msg'][5][1])
unit = size[0]/eval(unit.group(1))
# position (in characters) of the target word inside the sentence
pos_target = re.search("POS TARGET INSIDE BOX: (.*)\n",trial['events']['msg'][6][1])
pos_target = eval(pos_target.group(1))
# position (in pixels) of the target word
# convert width to the position in x, y cohordinates where the sentence starts
# stimulus starting position is = centre of x_axis screen - half size of the sentence
# because sentence is presented aligned to the centre of the screen
pos_startstim = DISPSIZE[0]/2-size[0]/2
# no need to calculate y as always in the same position at the centre
# only one line
# get x and y position of the target word
# as pos_target is in characters, we need to mutiply each letter*unit
# including in the AOI also half space preceding and half space
# following the target word
# tuple (x0,x1) position of the target word in pixels
target_x = (pos_startstim+(pos_target[0]*unit)-unit/2,pos_startstim+(pos_target[1]*unit)+unit/2)
target_y = (DISPSIZE[1]/2-size[1]*2,DISPSIZE[1]/2+size[1]*2)
# AOI for target_y position is two times the height of the letters
# no need to be too strict as there's just one line
# get all fixations on target word
# this is checks if targetstart_position<fixation_position<targetend_position
fixAOI_x = pd_fix['x'][(target_x[0]<pd_fix['x']) &
(pd_fix['x']<target_x[1]) &
(target_y[0]<pd_fix['y']) &
(pd_fix['y']<target_y[1])]
fixAOI_duration = pd_fix['duration'][(target_x[0]<pd_fix['x']) &
(pd_fix['x']<target_x[1]) &
(target_y[0]<pd_fix['y']) &
(pd_fix['y']<target_y[1])]
fixAOI_timein = pd_fix['start'][(target_x[0]<pd_fix['x']) &
(pd_fix['x']<target_x[1]) &
(target_y[0]<pd_fix['y']) &
(pd_fix['y']<target_y[1])] \
- stimonset
# 60ms refers to the pilot, need to check any systematic delay
# check if at least one fixation on target
if len(fixAOI_duration)>0:
# check this is first pass
# by checking if all previous fixations (indetify by index) have a smaller x_position
if all(pd_fix['x'][0:fixAOI_x.index[0]]<fixAOI_x[fixAOI_x.index[0]]):
start = pd_fix['start'][(target_x[0]<pd_fix['x']) &
(pd_fix['x']<target_x[1]) &
(target_y[0]<pd_fix['y']) &
(pd_fix['y']<target_y[1])].iloc[0]
# get the position in the events only list of data
plain_start = data_plain[data_plain['time']==start].index[0]
r = range(plain_start-2,plain_start+4)
# this range because each blink generates an artefactual saccade
# to each blink id surrounded by SSACC and ESAC events
# different ends to include also EFIX event
# this basically checks whether the fixation is immediately
# preceded or followed by a blink
if not (any(data_plain['event'].iloc[r]=='SBLINK')
or any(data_plain['event'].iloc[r]=='EBLINK')):
data_trial.loc[len(data_trial)] = [ID_stim,
fixAOI_duration.iloc[0],
fixAOI_timein.iloc[0],
1]
else:
data_trial.loc[len(data_trial)] = [ID_stim,
'BLINK',
0,
0]
else:
data_trial.loc[len(data_trial)] = [ID_stim,
'NOT FIRST PASS',
0,
0]
else:
data_trial.loc[len(data_trial)] = [ID_stim,
'SKIPPED',
0,
0]
return data_trial
base_dir = "//cbsu/data/Imaging/hauk/users/fm02/MEG_NEOS/ET_data"
participant = [#'156',
#'165',
#'190',
#'191',
#'193',
#'194'
#'195',
# '196',
# '197',
# '198',
# '199',
# '202',
# '203',
# '204',
# '206',
# '207',
# '209',
# '210',
# '213',
# '226',
# '228',
# '229',
# '232',
# '235',
# '245',
# '246',
# '025',
'031',
'034'
]
data = {}
data_plain = {}
for i in participant:
print(f'Reading EDF data participant {i}')
data[i] = read_edf(f"{base_dir}/{i}/{i}.asc",
"TRIGGER 94","TRIGGER 95")
data_plain[i] = read_edf_plain(f"{base_dir}/{i}/{i}.asc")
# loop over participants
for sbj in data.keys():
print(f'Extracting data participant {sbj}')
data_pilot2 = fixAOI(data[sbj], data_plain[sbj])
data_pilot2.to_csv(f'//cbsu/data/Imaging/hauk/users/fm02/MEG_NEOS/ET_data/{sbj}/ET_info_{sbj}.csv', index=False)
with open(f'//cbsu/data/Imaging/hauk/users/fm02/MEG_NEOS/ET_data/{sbj}/data_{sbj}.P', 'wb') as outfile:
pickle.dump(data[sbj],outfile)
##########################################################################
# # change event_156.asc
# import os
# import numpy as np
# import pandas as pd
# import re
# import pickle
# f = open('events_156.asc', 'r')
# raw = f.readlines()
# f.close()
# for i,row in enumerate(raw):
# raw[i] = row.split()
# new = pd.DataFrame(raw)
# new = new.drop(new[new[2]=='92'].index).reset_index(drop=True)
# for row in new.index:
# if new[2].loc[row] == '95':
# if new[1].loc[row-1] == '1':
# new[2].loc[row] = '1000'
# elif new[1].loc[row-1] == '2':
# new[2].loc[row] = '2000'
# elif new[1].loc[row-1] == '3':
# new[2].loc[row] = '3000'
# elif new[1].loc[row-1] == '4':
# new[2].loc[row] = '4000'
# elif new[1].loc[row-1] == '5':
# new[2].loc[row] = '5000'
# new.to_csv('newevents_156.asc', index=False, header=False, sep='\t')
# new[new[2] == '95']
# #
# #
# code for old presentations (i.e. probably id 156 and 165)
# i = 165
# fn = f"{base_dir}/{i}/{i}.asc"
# f = open(fn, 'r')
# raw = f.readlines()
# f.close()
# # get all indices the start with 'END' (when disconnected tracker eg end of trial)
# ends = []
# for row in range(len(raw)):
# line = raw[row].split()
# if len(line)>1:
# if line[0] == 'END':
# ends.append(row)
# # get indices of start of new blocks
# blocks = []
# for block in [80, 160, 240, 320]:
# blocks.append([i for i in range(len(raw)) if f'START TRIAL {block}' in raw[i]][0])
# # get indices (with respect to ends NOT raw) of the end of the last trial in a block
# separators = []
# for sep in range(len(blocks)):
# separators.append(len([blocks[sep] - end for end in ends if blocks[sep]>end])-1)
# raw_divided = {}
# raw_divided[0] = raw[0:ends[separators[0]]]
# raw_divided[1] = raw[ends[separators[0]]:ends[separators[1]]]
# raw_divided[2] = raw[ends[separators[1]]:ends[separators[2]]]
# raw_divided[3] = raw[ends[separators[2]]:ends[separators[3]]]
# raw_divided[4] = raw[ends[separators[3]]:-1]
# for k in raw_divided.keys():
# with open(f'165_block{k}.asc', 'w') as f:
# for line in raw_divided[k]:
# f.write("%s" % line)