-
Notifications
You must be signed in to change notification settings - Fork 64
Expand file tree
/
Copy pathAssignment_07E.py
More file actions
74 lines (54 loc) · 2.87 KB
/
Assignment_07E.py
File metadata and controls
74 lines (54 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
'''
A solution to a programming assignment for the Bioinformatics Algorithms (Part 1) on Coursera.
The associated textbook is Bioinformatics Algorithms: An Active-Learning Approach by Phillip Compeau & Pavel Pevzner.
The course is run on Coursera and the assignments and textbook are hosted on Stepic
Problem Title: Middle Edge in Linear Space Problem
Assignment #: 07
Problem ID: E
URL: https://stepic.org/Bioinformatics-Algorithms-2/SpaceEfficient-Sequence-Alignment-250/step/12
'''
def middle_column_score(v, w, scoring_matrix, sigma):
'''Returns the score of the middle column for the alignment of v and w.'''
# Initialize the score columns.
S = [[i*j*sigma for j in xrange(-1, 1)] for i in xrange(len(v)+1)]
S[0][1] = -sigma
backtrack = [0]*(len(v)+1)
# Fill in the Score and Backtrack matrices.
for j in xrange(1, len(w)/2+1):
for i in xrange(0, len(v)+1):
if i == 0:
S[i][1] = -j*sigma
else:
scores = [S[i-1][0] + scoring_matrix[v[i-1], w[j-1]], S[i][0] - sigma, S[i-1][1] - sigma]
S[i][1] = max(scores)
backtrack[i] = scores.index(S[i][1])
if j != len(w)/2:
S = [[row[1]]*2 for row in S]
return [row[1] for row in S], backtrack
def middle_edge(v, w, scoring_matrix, sigma):
'''Returns the middle edge in the alignment graph of v and w.'''
# Get the score of the middle column from the source to the middle. The backtrack matrix is unnecessary here.
source_to_middle = middle_column_score(v, w, scoring_matrix, sigma)[0]
# Get the score of the middle column from the middle to sink. Reverse the order as the computations are done in the opposite orientation.
middle_to_sink, backtrack = map(lambda l: l[::-1], middle_column_score(v[::-1], w[::-1]+['', '$'][len(w) % 2 == 1 and len(w) > 1], scoring_matrix, sigma))
# Get the componentwise sum of the middle column scores.
scores = map(sum, zip(source_to_middle, middle_to_sink))
# Get the position of the maximum score and the next node.
max_middle = max(xrange(len(scores)), key=lambda i: scores[i])
if max_middle == len(scores) - 1:
next_node = (max_middle, len(w)/2 + 1)
else:
next_node = [(max_middle + 1, len(w)/2 + 1), (max_middle, len(w)/2 + 1), (max_middle + 1, len(w)/2),][backtrack[max_middle]]
return (max_middle, len(w)/2), next_node
if __name__ == '__main__':
from scripts import BLOSUM62
# Read the input data.
with open('data/stepic_7e.txt') as input_data:
word1, word2 = [line.strip() for line in input_data.readlines()]
# Get the middle edge.
middle = middle_edge(word1, word2, BLOSUM62(), 5)
# Print and save the answer.
print ' '.join(map(str, middle))
with open('output/Assignment_07E.txt', 'w') as output_data:
output_data.write(' '.join(map(str, middle)))