-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranscript-coverage.py
executable file
·67 lines (63 loc) · 2.37 KB
/
transcript-coverage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
import sys
current_tid = None
current_tlen = 0
last_start = 0
last_end = 0
seg_lengths = []
seg_covs = []
for line in sys.stdin:
(tid,tstart,tend,tlen,
seqid,seqstart,seqend,seqlen,
pident,length,evalue) = line.split()
tstart = int(tstart)
tend = int(tend)
tlen = int(tlen)
seqstart = int(seqstart)
seqend = int(seqend)
seqlen = int(seqlen)
if tid != current_tid:
if current_tid is not None:
sys.stdout.write("{:s}\t{:d}\t{:d}\t{:f}\t{:f}\n"
.format(current_tid,current_tlen,
sum(seg_lengths),
float(sum(seg_lengths))/
current_tlen,
sum(seg_covs)/len(seg_covs)))
current_tid = tid
current_tlen = tlen
last_start = tstart
last_end = tend
seg_lengths = [last_end - last_start + 1]
seg_covs = [1.0]
else:
if tstart < last_start:
sys.exit("Input file not sorted by transcript start!")
if tstart <= last_end:
if tend > last_end:
# extend segment
seg_lengths[-1] = tend - last_start + 1
seg_covs[-1] = ( seg_covs[-1] * (tstart-last_start)
+ (seg_covs[-1]+1) * (last_end-tstart+1)
+ 1 * (tend-last_end)) \
/ seg_lengths[-1]
last_end = tend
else:
# embed segment
seg_covs[-1] = ( seg_covs[-1] * (tstart-last_start)
+ (seg_covs[-1]+1) * (tend-tstart+1)
+ seg_covs[-1] * (last_end-tend)) \
/ seg_lengths[-1]
else:
# new segment
last_start = tstart
last_end = tlen
seg_lengths.append(last_end - last_start + 1)
seg_covs.append(1.0)
if current_tid is not None:
sys.stdout.write("{:s}\t{:d}\t{:d}\t{:f}\t{:f}\n"
.format(current_tid,current_tlen,
sum(seg_lengths),
float(sum(seg_lengths))/
current_tlen,
sum(seg_covs)/len(seg_covs)))