Skip to content

Commit 4a56d6d

Browse files
author
Andre Kahles
committed
Unified Matlab and Python visualization
* Matlab will use the python visualization engine. This commit prepares future developments for spladder_viz. Functions loading data in a matlab or python specific manner have been moved into an identity module.
1 parent 2a619e4 commit 4a56d6d

File tree

12 files changed

+477
-61
lines changed

12 files changed

+477
-61
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
*.bam
33
*.bai
44
*.mexa64
5+
.history

matlab/modules/classes/__init__.py

Whitespace-only changes.

matlab/modules/classes/event.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import scipy as sp
2+
3+
class Event:
4+
5+
def __init__(self, event_type, chr=None, strand=None):
6+
7+
self.event_type = event_type
8+
self.chr = chr
9+
self.strand = strand
10+
self.strain = ''
11+
self.exons1 = sp.zeros((0, 2), dtype = 'int')
12+
self.exons2 = sp.zeros((0, 2), dtype = 'int')
13+
self.exons1_col = sp.zeros((2, 0), dtype = 'int')
14+
self.exons2_col = sp.zeros((2, 0), dtype = 'int')
15+
self.p_values = None
16+
self.gene_name = None
17+
self.transcript_type = None
18+
self.num_detected = None
19+
self.id = None
20+
self.detected = None
21+
self.verified = []
22+
self.num_verfied = 0
23+
self.confirmed = []
24+
25+
def get_len(self, trafo=False):
26+
27+
if trafo:
28+
return max(self.exons1_col.max(), self.exons2_col.max()) - min(self.exons1_col.min(), self.exons2_col.min())
29+
else:
30+
return max(self.exons1.max(), self.exons2.max()) - min(self.exons1.min(), self.exons2.min())
31+
32+
def get_inner_coords(self, trafo=False):
33+
34+
if self.event_type == 'mult_exon_skip':
35+
if trafo:
36+
return sp.sort(sp.unique(sp.r_[sp.sort(self.exons2_col.ravel())[1:4], sp.sort(self.exons2_col.ravel())[-4:-1]]))
37+
#return sp.unique(self.exons2_col.ravel())[1:-1]
38+
else:
39+
return sp.sort(sp.unique(sp.r_[sp.sort(self.exons2.ravel())[1:4], sp.sort(self.exons2.ravel())[-4:-1]]))
40+
#return sp.unique(self.exons2.ravel())[1:-1]
41+
elif self.event_type == 'mutex_exons':
42+
if trafo:
43+
return sp.sort(sp.r_[self.exons1_col.ravel()[1:4], self.exons2_col[1, :], self.exons1_col[2, 0]])
44+
else:
45+
return sp.sort(sp.r_[self.exons1.ravel()[1:4], self.exons2[1, :], self.exons1[2, 0]])
46+
else:
47+
if trafo:
48+
return sp.sort(sp.unique(sp.r_[sp.sort(self.exons1_col.ravel())[1:-1], sp.sort(self.exons2_col.ravel())[1:-1]]))
49+
else:
50+
return sp.sort(sp.unique(sp.r_[sp.sort(self.exons1.ravel())[1:-1], sp.sort(self.exons2.ravel())[1:-1]]))
51+
52+
53+
54+
def get_coords(self, trafo=False):
55+
56+
if self.event_type != 'mult_exon_skip':
57+
if trafo:
58+
#return sp.sort(sp.unique(sp.c_[self.exons1_col.ravel(), self.exons2_col.ravel()]))
59+
return sp.sort(sp.r_[self.exons1_col.ravel(), self.exons2_col.ravel()])
60+
else:
61+
#return sp.sort(sp.unique(sp.c_[self.exons1.ravel(), self.exons2.ravel()]))
62+
return sp.sort(sp.r_[self.exons1.ravel(), self.exons2.ravel()])
63+
else:
64+
if trafo:
65+
return sp.sort(sp.r_[self.exons1_col.ravel()[:4], self.exons2_col.ravel()[-4:]])
66+
else:
67+
return sp.sort(sp.r_[self.exons1.ravel()[:4], self.exons2.ravel()[-4:]])
68+
69+

matlab/modules/gene.py renamed to matlab/modules/classes/gene.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import scipy as sp
22

3-
from .utils import *
3+
from ..utils import *
4+
from .segmentgraph import Segmentgraph
5+
from .splicegraph import Splicegraph
46

57
class Gene:
68

@@ -13,9 +15,9 @@ def __init__(self, name=None, start=None, stop=None, chr=None, strand=None, sour
1315
self.strand = strand
1416
self.transcripts = []
1517
self.source = source
16-
self.splicegraph = None
17-
self.segmentgraph = None
18-
self.gene_type=gene_type
18+
self.splicegraph = Splicegraph()
19+
self.segmentgraph = Segmentgraph()
20+
self.gene_type = gene_type
1921
self.is_alt = None
2022
self.is_alt_spliced = None
2123

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import scipy as sp
2+
3+
class Segmentgraph:
4+
5+
def __init__(self, gene = None):
6+
7+
self.segments = sp.zeros((2, 0), dtype='int')
8+
self.seg_match = sp.zeros((0, 0), dtype='bool')
9+
self.seg_edges = sp.zeros((0, 0), dtype='bool')
10+
11+
if gene is not None:
12+
self.from_gene(gene)
13+
14+
def from_gene(self, gene):
15+
16+
sg = gene.splicegraph.vertices
17+
breakpoints = sp.unique(sg.ravel())
18+
self.segments = sp.zeros((2, 0), dtype='int')
19+
for j in range(1, breakpoints.shape[0]):
20+
s = sp.sum(sg[0, :] < breakpoints[j])
21+
e = sp.sum(sg[1, :] < breakpoints[j])
22+
if s > e:
23+
self.segments = sp.c_[self.segments, [breakpoints[j-1], breakpoints[j]]]
24+
25+
### match nodes to segments
26+
self.seg_match = sp.zeros((0, sg.shape[1]), dtype='bool')
27+
for j in range(sg.shape[1]):
28+
tmp = ((sg[0, j] <= self.segments[0, :]) & (sg[1, j] >= self.segments[1, :]))
29+
if self.seg_match.shape[0] == 0:
30+
self.seg_match = tmp.copy().reshape((1, tmp.shape[0]))
31+
else:
32+
self.seg_match = sp.r_[self.seg_match, tmp.reshape((1, tmp.shape[0]))]
33+
34+
### create edge graph between segments
35+
self.seg_edges = sp.zeros((self.segments.shape[1], self.segments.shape[1]), dtype='bool')
36+
k, l = sp.where(sp.triu(gene.splicegraph.edges))
37+
for m in range(k.shape[0]):
38+
### donor segment
39+
d = sp.where(self.seg_match[k[m], :])[0][-1]
40+
### acceptor segment
41+
a = sp.where(self.seg_match[l[m], :])[0][0]
42+
self.seg_edges[d, a] = True
43+
44+

matlab/modules/classes/splicegraph.py

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
import scipy as sp
2+
3+
from ..utils import *
4+
5+
import pdb
6+
7+
class Splicegraph:
8+
9+
def __init__(self, gene = None):
10+
11+
self.vertices = sp.zeros((2, 0), dtype='int')
12+
self.edges = sp.zeros((0, 0), dtype='int')
13+
self.terminals = sp.zeros((2, 0), dtype='int')
14+
15+
if gene:
16+
self.from_gene(gene)
17+
18+
def get_len(self):
19+
20+
return self.vertices.shape[1]
21+
22+
def new_edge(self):
23+
24+
self.edges = sp.c_[self.edges, sp.zeros((self.edges.shape[0], 1), dtype='int')]
25+
self.edges = sp.r_[self.edges, sp.zeros((1, self.edges.shape[1]), dtype='int')]
26+
27+
def subset(self, keep_idx):
28+
29+
self.vertices = self.vertices[:, keep_idx]
30+
self.edges = self.edges[keep_idx, :][:, keep_idx]
31+
self.terminals = self.terminals[:, keep_idx]
32+
33+
def update_terminals(self):
34+
35+
self.terminals = sp.zeros(self.vertices.shape, dtype='int')
36+
self.terminals[0, sp.where(sp.sum(sp.tril(self.edges), axis=1) == 0)[0]] = 1
37+
self.terminals[1, sp.where(sp.sum(sp.triu(self.edges), axis=1) == 0)[0]] = 1
38+
39+
def reorder(self, idx):
40+
41+
self.vertices = self.vertices[:, idx]
42+
self.edges = self.edges[idx, :][:, idx]
43+
self.terminals = self.terminals[:, idx]
44+
45+
def sort(self):
46+
47+
s_idx = sp.lexsort([self.vertices[1, :], self.vertices[0, :]])
48+
self.reorder(s_idx)
49+
50+
def from_gene(self, gene):
51+
52+
for transcript_idx in range(len(gene.transcripts)):
53+
exon_start_end = gene.exons[transcript_idx]
54+
55+
### only one exon in the transcript
56+
if exon_start_end.shape[0] == 1:
57+
exon1_start = exon_start_end[0, 0]
58+
exon1_end = exon_start_end[0, 1]
59+
60+
if self.vertices.shape[1] == 0:
61+
self.vertices = sp.array([[exon1_start], [exon1_end]], dtype='int')
62+
self.edges = sp.array([[0]], dtype='int')
63+
else:
64+
self.vertices = sp.c_[self.vertices, [exon1_start, exon1_end]]
65+
self.new_edge()
66+
### more than one exon in the transcript
67+
else:
68+
for exon_idx in xrange(exon_start_end.shape[0] - 1):
69+
exon1_start = exon_start_end[exon_idx , 0]
70+
exon1_end = exon_start_end[exon_idx, 1]
71+
exon2_start = exon_start_end[exon_idx + 1, 0]
72+
exon2_end = exon_start_end[exon_idx + 1, 1]
73+
74+
if self.vertices.shape[1] == 0:
75+
self.vertices = sp.array([[exon1_start, exon2_start], [exon1_end, exon2_end]], dtype='int')
76+
self.edges = sp.array([[0, 1], [1, 0]], dtype='int')
77+
else:
78+
exon1_idx = -1
79+
exon2_idx = -1
80+
### check if current exon already occurred
81+
for idx in range(self.vertices.shape[1]):
82+
if ((self.vertices[0, idx] == exon1_start) and (self.vertices[1, idx] == exon1_end)):
83+
exon1_idx = idx
84+
if ((self.vertices[0, idx] == exon2_start) and (self.vertices[1, idx] == exon2_end)):
85+
exon2_idx = idx
86+
87+
### both exons already occured -> only add an edge
88+
if (exon1_idx != -1) and (exon2_idx != -1):
89+
self.edges[exon1_idx, exon2_idx] = 1
90+
self.edges[exon2_idx, exon1_idx] = 1
91+
else:
92+
### 2nd exon occured
93+
if ((exon1_idx == -1) and (exon2_idx != -1)):
94+
self.vertices = sp.c_[self.vertices, [exon1_start, exon1_end]]
95+
self.new_edge()
96+
self.edges[exon2_idx, -1] = 1
97+
self.edges[-1, exon2_idx] = 1
98+
### 1st exon occured
99+
elif ((exon2_idx == -1) and (exon1_idx != -1)):
100+
self.vertices = sp.c_[self.vertices, [exon2_start, exon2_end]]
101+
self.new_edge()
102+
self.edges[exon1_idx, -1] = 1
103+
self.edges[-1, exon1_idx] = 1
104+
### no exon occured
105+
else:
106+
assert((exon1_idx == -1) and (exon2_idx == -1))
107+
self.vertices = sp.c_[self.vertices, [exon1_start, exon1_end]]
108+
self.vertices = sp.c_[self.vertices, [exon2_start, exon2_end]]
109+
self.new_edge()
110+
self.new_edge()
111+
self.edges[-2, -1] = 1
112+
self.edges[-1, -2] = 1
113+
114+
### take care of the sorting by exon start
115+
s_idx = sp.argsort(self.vertices[0, :])
116+
self.vertices = self.vertices[:, s_idx]
117+
self.edges = self.edges[s_idx, :][:, s_idx]
118+
self.terminals = sp.zeros(self.vertices.shape, dtype='int')
119+
self.terminals[0, sp.where(sp.tril(self.edges).sum(axis=1) == 0)[0]] = 1
120+
self.terminals[1, sp.where(sp.triu(self.edges).sum(axis=1) == 0)[0]] = 1
121+
122+
123+
def from_matfile(self, mat_struct):
124+
"""generates a splicing graph structure from a matfile structure"""
125+
126+
self.vertices = mat_struct['splicegraph'][0, 0].astype('int')
127+
self.edges = mat_struct['splicegraph'][0, 1].astype('int')
128+
self.terminals = mat_struct['splicegraph'][0, 2].astype('int')
129+
130+
131+
def add_intron(self, idx1, flag1, idx2, flag2):
132+
"""adds new introns into splicegraph between idx1 and idx2"""
133+
134+
### if flag1, all end terminal exons in idx1 are preserved
135+
### if flag2, all start terminal exons in idx2 are preserved
136+
137+
if idx2.shape[0] > 0:
138+
adj_mat = sp.triu(self.edges)
139+
140+
if flag1:
141+
for i1 in idx1:
142+
143+
### if exon is end-terminal
144+
if sp.all(adj_mat[i1, :] == 0):
145+
146+
self.vertices = sp.c_[self.vertices, self.vertices[:, i1]]
147+
148+
self.new_edge()
149+
self.edges[:, -1] = self.edges[:, i1]
150+
self.edges[-1, :] = self.edges[i1, :]
151+
152+
self.terminals = sp.c_[self.terminals, self.terminals[:, i1]]
153+
if flag2:
154+
for i2 in idx2:
155+
### if exon is start-terminal
156+
if sp.all(adj_mat[:, i2] == 0):
157+
self.vertices = sp.c_[self.vertices, self.vertices[:, i2]]
158+
159+
self.new_edge()
160+
self.edges[:, -1] = self.edges[:, i2]
161+
self.edges[-1, :] = self.edges[i2, :]
162+
163+
self.terminals = sp.c_[self.terminals, self.terminals[:, i2]]
164+
165+
for i1 in idx1:
166+
for i2 in idx2:
167+
self.edges[i1, i2] = 1
168+
self.edges[i2, i1] = 1
169+
170+
self.uniquify()
171+
172+
def add_cassette_exon(self, new_exon, exons_pre, exons_aft):
173+
### exon_pre contains the indices of preceding exons
174+
### exon_aft contains the indices of successing exons
175+
176+
self.vertices = sp.c_[self.vertices, new_exon]
177+
178+
self.new_edge()
179+
180+
self.edges[exons_pre, -1] = 1
181+
self.edges[exons_aft, -1] = 1
182+
self.edges[-1, :] = self.edges[:, -1].T
183+
184+
self.terminals = sp.c_[self.terminals, sp.zeros((2,), dtype='int')]
185+
186+
187+
def add_intron_retention(self, idx1, idx2):
188+
189+
adj_mat = sp.triu(self.edges)
190+
191+
self.vertices = sp.c_[self.vertices, sp.array([self.vertices[0, idx1], self.vertices[1, idx2]], dtype='int')]
192+
193+
self.new_edge()
194+
195+
adj_mat = sp.r_[adj_mat, sp.zeros((1, adj_mat.shape[1]), dtype='int')]
196+
adj_mat = sp.c_[adj_mat, sp.zeros((adj_mat.shape[0], 1), dtype='int')]
197+
198+
### check if adjacency matrix is symmetric
199+
### otherwise or is not justyfied
200+
assert(sp.all(sp.all(adj_mat - (self.edges - adj_mat).T == 0)))
201+
202+
### AK: under the assumption that our splice graph representation is symmetric
203+
### I preserve symmetry by using OR over the adj_mat column and row
204+
205+
self.edges[:, -1] = adj_mat[:, idx1] | adj_mat[idx2, :].T
206+
self.edges[-1, :] = adj_mat[:, idx1].T | adj_mat[idx2, :]
207+
208+
self.terminals = sp.c_[self.terminals, sp.array([self.terminals[0, idx1], self.terminals[1, idx2]], dtype='int')]
209+
210+
def uniquify(self):
211+
# OUTPUT: splice graph that has been made unique on exons for each gene
212+
213+
self.sort()
214+
(s_tmp, s_idx) = sort_rows(self.vertices.T, index=True)
215+
self.vertices = s_tmp.T
216+
self.edges = self.edges[s_idx, :][:, s_idx]
217+
self.terminals = self.terminals[:, s_idx]
218+
219+
rm_idx = []
220+
for j in range(1, self.vertices.shape[1]):
221+
if sp.all(self.vertices[:, j-1] == self.vertices[:, j]):
222+
self.edges[:, j] = self.edges[:, j-1] | self.edges[:, j]
223+
self.edges[j, :] = self.edges[j-1, :] | self.edges[j, :]
224+
rm_idx.append(j - 1)
225+
226+
keep_idx = sp.where(~sp.in1d(sp.array(range(self.vertices.shape[1])), rm_idx))[0]
227+
self.vertices = self.vertices[:, keep_idx]
228+
self.edges = self.edges[keep_idx, :][:, keep_idx]
229+
self.terminals = self.terminals[:, keep_idx]
230+

0 commit comments

Comments
 (0)