Skip to content

Commit eaa6fbe

Browse files
authored
new files
1 parent e2410ff commit eaa6fbe

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed

get_amino_acid_pos.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Thu Jun 3 18:20:54 2021
5+
6+
@author: rahinavelkar
7+
"""
8+
9+
#The script take single or multiple fasta sequences and prints the desired amino acid and the respective position in the fasta sequence
10+
# To match a single position add aa = 'S', for multiple amino acid residues use aa = 'S|T|K'
11+
#generates a space seperated file with accession, matched amino acid, position of the matched amino acid
12+
13+
import re
14+
15+
infile = "/Users/rahinavelkar/Desktop/scripts/input.fasta"
16+
outfile = "/Users/rahinavelkar/Desktop/scripts/get_aa_pos_biopython.txt"
17+
amino_acid = []
18+
pos = []
19+
fasta = {}
20+
aa = 'K|T|S'
21+
22+
23+
from Bio import SeqIO
24+
for seq_record in SeqIO.parse(infile, "fasta"):
25+
fasta[seq_record.id] = seq_record.seq
26+
27+
for name,seq in fasta.items():
28+
pattern = re.finditer(aa, str(seq))
29+
for i in pattern:
30+
match_location = i.start() + 1
31+
pos.append(str(match_location))
32+
match_amino_acid = i.group()
33+
amino_acid.append(match_amino_acid)
34+
sourceFile = open(outfile,'a')
35+
print(name, ("|").join(amino_acid), ("|").join(pos), file=sourceFile)
36+
sourceFile.close()
37+
38+
print('Done')

0 commit comments

Comments
 (0)