Skip to content

Commit 6dd5a1a

Browse files
Add files via upload
1 parent 0742ff9 commit 6dd5a1a

File tree

1 file changed

+349
-0
lines changed

1 file changed

+349
-0
lines changed

ts_ae.py

Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
# Neural net code from the tutorials found here
2+
# http://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html
3+
4+
# To use this code please install pytorch, and mne
5+
# http://pytorch.org
6+
# MNE can be installed alongside other useful tools by installing braindecode
7+
# https://robintibor.github.io/braindecode/index.html
8+
9+
import torch
10+
import torch.nn as nn
11+
import torch.nn.functional as F
12+
from torch import optim
13+
from torch.autograd import Variable
14+
15+
import mne
16+
import random
17+
import numpy as np
18+
from mne import concatenate_raws
19+
20+
import unicodedata
21+
import string
22+
import re
23+
24+
25+
# TOOLS
26+
import time
27+
import math
28+
def asMinutes(s):
29+
m = math.floor(s / 60)
30+
s -= m * 60
31+
return '%dm %ds' % (m, s)
32+
def timeSince(since, percent):
33+
now = time.time()
34+
s = now - since
35+
if percent is not 0:
36+
es = s / (percent)
37+
rs = es - s
38+
return '%s (- %s)' % (asMinutes(s), asMinutes(rs))
39+
else:
40+
return '%s (- ?)' % (asMinutes(s))
41+
42+
import matplotlib.pyplot as plt
43+
import matplotlib.ticker as ticker
44+
def showPlot(points, show_plot):
45+
plt.figure()
46+
fig, ax = plt.subplots()
47+
# this locator puts ticks at regular intervals
48+
loc = ticker.MultipleLocator(base=0.2)
49+
ax.yaxis.set_major_locator(loc)
50+
plt.plot(points)
51+
if show_plot:
52+
plt.show()
53+
54+
55+
56+
use_cuda = torch.cuda.is_available()
57+
use_cuda = False # Overriding cuda because my graphics card only has cuda compatibility 3.0
58+
59+
60+
class EncoderRNN(nn.Module):
61+
def __init__(self, input_size, hidden_size):
62+
super(EncoderRNN, self).__init__()
63+
self.hidden_size = hidden_size
64+
self.linear = nn.Linear(input_size, hidden_size)
65+
self.gru = nn.GRU(hidden_size, hidden_size)
66+
67+
def forward(self, input, hidden):
68+
linear1 = self.linear(input).view(1, 1, -1)
69+
output = linear1
70+
output, hidden = self.gru(output, hidden)
71+
return output, hidden
72+
73+
def initHidden(self):
74+
result = Variable(torch.zeros(1, 1, self.hidden_size))
75+
if use_cuda:
76+
return result.cuda()
77+
else:
78+
return result
79+
80+
class DecoderRNN(nn.Module):
81+
def __init__(self, hidden_size, output_size):
82+
super(DecoderRNN, self).__init__()
83+
self.hidden_size = hidden_size
84+
self.linear = nn.Linear(output_size, hidden_size)
85+
self.gru = nn.GRU(hidden_size, hidden_size)
86+
self.out = nn.Linear(hidden_size, output_size)
87+
88+
def forward(self, input, hidden):
89+
output = self.linear(input).view(1, 1, -1)
90+
output = F.relu(output)
91+
output, hidden = self.gru(output, hidden)
92+
output = self.out(output[0])
93+
return output, hidden
94+
95+
def initHidden(self):
96+
result = Variable(torch.zeros(1, 1, self.hidden_size))
97+
if use_cuda:
98+
return result.cuda()
99+
else:
100+
return result
101+
102+
#teacher_forcing_ratio = 0 # use for non sequence to sequence
103+
teacher_forcing_ratio = 0.5 # use for sequence to sequence
104+
105+
def train(input_variable, target_variable, encoder, decoder,
106+
encoder_optimizer, decoder_optimizer,
107+
criterion, max_length):
108+
encoder_hidden = encoder.initHidden()
109+
110+
if encoder_optimizer is not None:
111+
encoder_optimizer.zero_grad()
112+
if decoder_optimizer is not None:
113+
decoder_optimizer.zero_grad()
114+
115+
input_length = input_variable.size()[0]
116+
target_length = target_variable.size()[0]
117+
118+
#catalog of encoder outputs
119+
encoder_outputs = Variable(torch.zeros(input_length, encoder.hidden_size))
120+
encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs
121+
122+
loss = 0
123+
124+
for ei in range(input_length):
125+
encoder_output, encoder_hidden = encoder(
126+
input_variable[ei], encoder_hidden)
127+
#We are not using encoder outputs, just final hidden state, but collect data anyway
128+
encoder_outputs[ei] = encoder_output[0][0]
129+
130+
decoder_input = Variable(torch.zeros(1,max_length))
131+
decoder_input = decoder_input.cuda() if use_cuda else decoder_input
132+
133+
# Decoders first hidden is the final hidden from the encoder
134+
decoder_hidden = encoder_hidden
135+
136+
use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
137+
138+
if use_teacher_forcing:
139+
# Teacher forcing: Feed the target as the next input
140+
for di in range(target_length):
141+
decoder_output, decoder_hidden = decoder(
142+
decoder_input, decoder_hidden)
143+
loss += criterion(decoder_output, target_variable[di])
144+
decoder_input = target_variable[di] # Teacher forcing
145+
146+
else:
147+
# Without teacher forcing: use its own predictions as the next imput
148+
for di in range(target_length):
149+
decoder_output, decoder_hidden = decoder(
150+
decoder_input, decoder_hidden)
151+
152+
decoder_input = decoder_output
153+
decoder_input = decoder_input.cuda() if use_cuda else decoder_input
154+
155+
loss += criterion(decoder_output, target_variable[di])
156+
157+
loss.backward()
158+
159+
if encoder_optimizer is not None:
160+
encoder_optimizer.step()
161+
if decoder_optimizer is not None:
162+
decoder_optimizer.step()
163+
164+
return (loss.data[0] / target_length)
165+
166+
167+
def trainIters(encoder, decoder, criterion, pairs, n_iters, print_every=1000, plot_every=100, show_plot=False, learning_rate=0.01, max_length = 100):
168+
start = time.time()
169+
plot_losses = []
170+
print_loss_total = 0 # Reset every print_every
171+
plot_loss_total = 0 # Reset every plot_every
172+
173+
encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
174+
decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
175+
# using random.choice allows us to train more than the
176+
# amount of input we have by randomly picking from range over and over
177+
training_pairs = [random.choice(pairs) for i in range(n_iters)]
178+
179+
for iter in range(1, n_iters + 1):
180+
training_pair = training_pairs[iter - 1]
181+
input_variable = training_pair[0]
182+
target_variable = training_pair[1]
183+
184+
loss = train(input_variable, target_variable, encoder,
185+
decoder, encoder_optimizer, decoder_optimizer,
186+
criterion, max_length)
187+
print_loss_total += loss
188+
plot_loss_total += loss
189+
190+
if iter % print_every == 0:
191+
print_loss_avg = print_loss_total / print_every
192+
print_loss_total = 0
193+
print('%s (%d %d%%) %.4f' % (timeSince(start, float(iter) / float(n_iters)),
194+
iter, float(iter) / float(n_iters) * 100, print_loss_avg))
195+
196+
if iter % plot_every == 0:
197+
plot_loss_avg = plot_loss_total / plot_every
198+
plot_losses.append(plot_loss_avg)
199+
plot_loss_total = 0
200+
201+
print("Done!")
202+
showPlot(plot_losses, show_plot)
203+
204+
205+
206+
207+
def get_data(file_name, channel):
208+
data_path = '/home/jeff/Documents/pytorch/ae_ts/data/'
209+
single_data_file = data_path + file_name
210+
211+
raw = mne.io.read_raw_edf(single_data_file, preload=True, stim_channel='auto')
212+
213+
# crop it if data too big
214+
#raw.crop(0,5000)
215+
216+
# pick a channel
217+
#>>> raw.ch_names
218+
#[u'ROC-LOC', u'LOC-ROC', u'F2-F4', u'F4-C4', u'C4-P4', u'P4-O2',
219+
# u'F1-F3', u'F3-C3', u'C3-P3', u'P3-O1', u'C4-A1', u'EMG1-EMG2',
220+
# u'ECG1-ECG2', u'TERMISTORE', u'TORACE', u'ADDOME', u'Dx1-DX2',
221+
# u'SX1-SX2', u'Posizione', u'HR', u'SpO2']
222+
223+
# Can only pick one channel at a time with this setup
224+
raw.pick_channels([channel])
225+
226+
# this makes a numpy array
227+
# use data.shape to get shape
228+
data = raw.get_data()
229+
230+
data_points_per_second = raw.n_times / raw.times.max()
231+
232+
# free memory
233+
del raw
234+
235+
return data, data_points_per_second
236+
237+
238+
def pairs_from_slices(slices):
239+
#The expected (target) variable should NOT require gradient
240+
if use_cuda:
241+
data_pairs = [(Variable(slices[i], requires_grad=True).cuda(),
242+
Variable(slices[i], requires_grad=False).cuda())
243+
for i in range(0,len(slices))]
244+
else:
245+
data_pairs = [(Variable(slices[i], requires_grad=True),
246+
Variable(slices[i], requires_grad=False))
247+
for i in range(0,len(slices))]
248+
249+
return data_pairs
250+
251+
def make_simple_data_pairs(data, max_length):
252+
# Use a tensor
253+
tdata = torch.from_numpy(data)
254+
255+
# Convert from double to float so stuff works
256+
tdata = tdata.float()
257+
258+
#if use_cuda:
259+
# tdata = tdata.cuda()
260+
261+
# Since we only have one sample, if we had more we would have them be the other axis
262+
# samples x channels x data
263+
# the other way of doing it is by using sequences, as we do in the next function
264+
tdata = tdata.unsqueeze(0)
265+
266+
# make data more human readable,
267+
# (almost all) data point x is -1 < x < 1
268+
tdata = tdata * 10000
269+
270+
var = Variable(tdata, requires_grad=True)
271+
272+
slices = torch.split(tdata, max_length, 2)
273+
slices = slices[:-1] #last item may not match expected length of MAX_LENGTH
274+
275+
return pairs_from_slices(slices)
276+
277+
def make_sequenced_data_pairs(data, max_length, sequence_count = 30):
278+
#split the data into MAX_LENGTH log chunks
279+
split_sections = int(data.size / max_length)
280+
data = np.stack(np.array_split(data,split_sections,1)[:-1])
281+
282+
tdata = torch.from_numpy(data)
283+
tdata = tdata.float()
284+
285+
# make data more human readable,
286+
# (almost all) data point x is -1 < x < 1
287+
tdata = tdata * 10000
288+
289+
# sequence count (number of 1 second chunks in a sequence)
290+
slices = torch.split(tdata, sequence_count, 0)
291+
slices = slices[:-1]
292+
293+
return pairs_from_slices(slices)
294+
295+
296+
297+
data, data_points_per_second = get_data('n1.edf', 'F2-F4')
298+
#MAX_LENGTH is the length of a section of data we are observing (feeding into the alg)
299+
MAX_LENGTH = int(data_points_per_second / 4)
300+
#MAX_LENGTH = 512
301+
302+
# Hidden layer size will be a percentage of the data size
303+
hidden_layer_percentage = 0.1
304+
hidden_size = int(MAX_LENGTH * hidden_layer_percentage)
305+
306+
#SC = 30
307+
SC = 4
308+
309+
def run_seq_single(): #used just to see if encoder/decoder is working
310+
pairs = make_sequenced_data_pairs(data, MAX_LENGTH)
311+
return run_train_only(pairs, hidden_size, MAX_LENGTH)
312+
313+
def run_seq():
314+
pairs = make_sequenced_data_pairs(data, MAX_LENGTH, SC)
315+
return run_train_iters(pairs, hidden_size, MAX_LENGTH)
316+
317+
def run_single(): #used just to see if encoder/decoder is working
318+
pairs = make_simple_data_pairs(data, MAX_LENGTH)
319+
return run_train_only(pairs, hidden_size, MAX_LENGTH)
320+
321+
def run():
322+
pairs = make_simple_data_pairs(data, MAX_LENGTH)
323+
return run_train_iters(pairs, hidden_size, MAX_LENGTH)
324+
325+
326+
327+
def setup_encoder_decoder(hidden_size, max_length):
328+
encoder1 = EncoderRNN(MAX_LENGTH, hidden_size)
329+
decoder1 = DecoderRNN(hidden_size, MAX_LENGTH)
330+
if use_cuda:
331+
encoder1 = encoder1.cuda()
332+
decoder1 = decoder1.cuda()
333+
334+
criterion1 = nn.L1Loss()
335+
return encoder1, decoder1, criterion1
336+
337+
def run_train_only(data_pairs, hidden_size, max_length):
338+
encoder1, decoder1, criterion1 = setup_encoder_decoder(hidden_size, max_length)
339+
return train(data_pairs[0][0], data_pairs[0][1],
340+
encoder1, decoder1,
341+
None, None,
342+
criterion1, max_length)
343+
344+
def run_train_iters(data_pairs, hidden_size, max_length):
345+
encoder1, decoder1, criterion1 = setup_encoder_decoder(hidden_size, max_length)
346+
return trainIters(encoder1, decoder1, criterion1,
347+
data_pairs, n_iters=len(data_pairs)*5,
348+
print_every=50, plot_every=10, show_plot=True,
349+
learning_rate=0.01, max_length=max_length)

0 commit comments

Comments
 (0)