Skip to content

Commit e9319d8

Browse files
authored
Add files via upload
1 parent 5629798 commit e9319d8

File tree

1 file changed

+387
-0
lines changed
  • UCSD - Biology Meets Programming Bioinformatics

1 file changed

+387
-0
lines changed
Lines changed: 387 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,387 @@
1+
"""
2+
@author: salimt
3+
"""
4+
5+
import random
6+
7+
"""
8+
The functions in Motif.py will return 0 for an entire motif probability even if only
9+
one of the positions has a 0 probability of existing in the consensus string.
10+
11+
This doesn't make sense because a motif that differs from the consensus string
12+
at every position will also get a total probability of 0.
13+
14+
In order to improve this unfair scoring, bioinformaticians often substitute zeroes
15+
with small numbers called pseudocounts.
16+
"""
17+
# Input: String Text, an integer k, and profile matrix Profile
18+
# Output: String of most probable pattern
19+
def ProfileMostProbableKmer(text, k, profile):
20+
maxProb = Pr(text[0:k], profile)
21+
maxMotif = text[0:0+k]
22+
for i in range(1, len(text)-k+1):
23+
tempProb = Pr(text[i:i+k], profile)
24+
if tempProb > maxProb:
25+
maxProb = tempProb
26+
maxMotif = text[i:i+k]
27+
return maxMotif
28+
29+
# Input: A set of kmers Motifs
30+
# Output: CountWithPseudocounts(Motifs)
31+
def CountWithPseudocounts(Motifs):
32+
t = len(Motifs)
33+
k = len(Motifs[0])
34+
initialList = {i: [0]*k for i in "ACGT"}
35+
for i in range(k):
36+
for j in range(t):
37+
initialList[Motifs[j][i]][i] = initialList.get(Motifs[j][i])[i]+1
38+
initialList = {k: [initialList.get(k)[i]+1 for i in range(len(v))] for k,v in initialList.items()}
39+
return initialList
40+
41+
"""
42+
ProfileWithPseudocounts(Motifs) that takes a list of strings Motifs as input and
43+
returns the profile matrix of Motifs with pseudocounts as a dictionary of lists
44+
"""
45+
46+
# Input: A set of kmers Motifs
47+
# Output: ProfileWithPseudocounts(Motifs)
48+
def ProfileWithPseudocounts(Motifs):
49+
t = len(Motifs)
50+
k = len(Motifs[0])
51+
profile = {} # output variable
52+
countMotifs = CountWithPseudocounts(Motifs)
53+
for key,val in countMotifs.items():
54+
profile[key] = [i/(t+4) for i in val]
55+
return profile
56+
57+
# motif1 = "AACGTA"
58+
# motif2 = "CCCGTT"
59+
# motif3 = "CACCTT"
60+
# motif4 = "GGATTA"
61+
# motif5 = "TTCCGG"
62+
# motifs = [motif1, motif2, motif3, motif4, motif5]
63+
#
64+
# print(ProfileWithPseudocounts(motifs))
65+
66+
"""
67+
Write a function GreedyMotifSearchWithPseudocounts(Dna, k, t) that takes a list
68+
of strings Dna followed by integers k and t and returns the result of running
69+
GreedyMotifSearch, where each profile matrix is generated with pseudocounts
70+
"""
71+
# Input: A list of kmers Dna, and integers k and t (where t is the number of kmers in Dna)
72+
# Output: GreedyMotifSearch(Dna, k, t)
73+
def GreedyMotifSearchWithPseudocounts(Dna, k, t):
74+
BestMotifs = [Dna[i][0:k] for i in range(0, t)]
75+
for i in range(len(Dna[0])-k+1):
76+
Motifs = []
77+
Motifs.append(Dna[0][i:i+k])
78+
for j in range(1, t):
79+
P = ProfileWithPseudocounts(Motifs[0:j])
80+
Motifs.append(ProfileMostProbableKmer(Dna[j], k, P))
81+
if Score(Motifs) < Score(BestMotifs):
82+
BestMotifs = Motifs
83+
return BestMotifs
84+
85+
# Input: A set of kmers Motifs
86+
# Output: A consensus string of Motifs.
87+
def Consensus(Motifs):
88+
finalMotif = []
89+
motifCounts = CountWithPseudocounts(Motifs)
90+
for i in range(len(Motifs[0])):
91+
maxVal = 0
92+
motif = ""
93+
for k,v in motifCounts.items():
94+
if v[i] > maxVal:
95+
maxVal = v[i]
96+
motif = k
97+
finalMotif.append(motif)
98+
99+
return "".join(finalMotif)
100+
101+
# Input: A set of k-mers Motifs
102+
# Output: The score of these k-mers.
103+
def Score(Motifs):
104+
consensus = Consensus(Motifs)
105+
motifCounts = CountWithPseudocounts(Motifs)
106+
score = [len(Motifs)-motifCounts.get(consensus[i])[i] for i in range(len(Motifs[0]))]
107+
return sum(score)
108+
109+
# Input: String Text and profile matrix Profile
110+
# Output: Probability value
111+
def Pr(Text, Profile):
112+
score = [Profile.get(Text[i])[i] for i in range(len(Text))]
113+
product = 1
114+
for x in score:
115+
product *= x
116+
return product
117+
118+
# k = 3
119+
# t = 5
120+
# Dna = ["GGCGTTCAGGCA", "AAGAATCAGTCA", "CAAGGAGTTCGC", "CACGTCAATCAC", "CAATAATATTCG"]
121+
# print(GreedyMotifSearchWithPseudocounts(Dna, k, t))
122+
123+
# Input: A profile matrix Profile and a list of strings Dna
124+
# Output: Profile-most probable k-mer from each row of Dna
125+
def Motifs(Profile,k, Dna):
126+
return [ProfileMostProbableKmer(dna,k,Profile) for dna in Dna]
127+
128+
# Profile = {'A': [0.8, 0.0, 0.0, 0.2],
129+
# 'C': [0.0, 0.6, 0.2, 0.0],
130+
# 'G': [0.2, 0.2, 0.8, 0.0],
131+
# 'T': [0.0, 0.2, 0.0, 0.8]}
132+
#
133+
# Dnas = ["TTACCTTAAC", "GATGTCTGTC", "ACGGCGTTAG", "CCCTAACGAG", "CGTCAGAGGT"]
134+
#
135+
# print(Motifs(Profile, Dnas))
136+
137+
# Input: A list of strings Dna, and integers k and t
138+
# Output: RandomMotifs(Dna, k, t)
139+
# HINT: You might not actually need to use t since t = len(Dna), but you may find it convenient
140+
def RandomMotifs(Dna, k, t):
141+
randMotifs = []
142+
for dna in Dna:
143+
randomNum = random.randint(1, abs(k-t))
144+
randMotifs.append(dna[randomNum: randomNum+k])
145+
return randMotifs
146+
#
147+
# Dnas = ["TTACCTTAAC", "GATGTCTGTC", "ACGGCGTTAG", "CCCTAACGAG", "CGTCAGAGGT"]
148+
# k = 3
149+
# t = len(Dnas)
150+
# print(RandomMotifs(Dnas, k, t))
151+
152+
# Input: Positive integers k and t, followed by a list of strings Dna
153+
# Output: return a list of random kmer motifs
154+
def RandomizedMotifSearch(Dna, k, t):
155+
M = RandomMotifs(Dna, k, t)
156+
BestMotifs = M
157+
while True:
158+
Profile = ProfileWithPseudocounts(M)
159+
M = Motifs(Profile,k, Dna)
160+
if Score(M) < Score(BestMotifs):
161+
BestMotifs = M
162+
else:
163+
return BestMotifs
164+
165+
#Dna = ["GCGCCCCGCCCGGACAGCCATGCGCTAACCCTGGCTTCGATGGCGCCGGCTCAGTTAGGGCCGGAAGTCCCCAATGTGGCAGACCTTTCGCCCCTGGCGGACGAATGACCCCAGTGGCCGGGACTTCAGGCCCTATCGGAGGGCTCCGGCGCGGTGGTCGGATTTGTCTGTGGAGGTTACACCCCAATCGCAAGGATGCATTATGACCAGCGAGCTGAGCCTGGTCGCCACTGGAAAGGGGAGCAACATC",
166+
#"CCGATCGGCATCACTATCGGTCCTGCGGCCGCCCATAGCGCTATATCCGGCTGGTGAAATCAATTGACAACCTTCGACTTTGAGGTGGCCTACGGCGAGGACAAGCCAGGCAAGCCAGCTGCCTCAACGCGCGCCAGTACGGGTCCATCGACCCGCGGCCCACGGGTCAAACGACCCTAGTGTTCGCTACGACGTGGTCGTACCTTCGGCAGCAGATCAGCAATAGCACCCCGACTCGAGGAGGATCCCG",
167+
#"ACCGTCGATGTGCCCGGTCGCGCCGCGTCCACCTCGGTCATCGACCCCACGATGAGGACGCCATCGGCCGCGACCAAGCCCCGTGAAACTCTGACGGCGTGCTGGCCGGGCTGCGGCACCTGATCACCTTAGGGCACTTGGGCCACCACAACGGGCCGCCGGTCTCGACAGTGGCCACCACCACACAGGTGACTTCCGGCGGGACGTAAGTCCCTAACGCGTCGTTCCGCACGCGGTTAGCTTTGCTGCC",
168+
#"GGGTCAGGTATATTTATCGCACACTTGGGCACATGACACACAAGCGCCAGAATCCCGGACCGAACCGAGCACCGTGGGTGGGCAGCCTCCATACAGCGATGACCTGATCGATCATCGGCCAGGGCGCCGGGCTTCCAACCGTGGCCGTCTCAGTACCCAGCCTCATTGACCCTTCGACGCATCCACTGCGCGTAAGTCGGCTCAACCCTTTCAAACCGCTGGATTACCGACCGCAGAAAGGGGGCAGGAC",
169+
#"GTAGGTCAAACCGGGTGTACATACCCGCTCAATCGCCCAGCACTTCGGGCAGATCACCGGGTTTCCCCGGTATCACCAATACTGCCACCAAACACAGCAGGCGGGAAGGGGCGAAAGTCCCTTATCCGACAATAAAACTTCGCTTGTTCGACGCCCGGTTCACCCGATATGCACGGCGCCCAGCCATTCGTGACCGACGTCCCCAGCCCCAAGGCCGAACGACCCTAGGAGCCACGAGCAATTCACAGCG",
170+
#"CCGCTGGCGACGCTGTTCGCCGGCAGCGTGCGTGACGACTTCGAGCTGCCCGACTACACCTGGTGACCACCGCCGACGGGCACCTCTCCGCCAGGTAGGCACGGTTTGTCGCCGGCAATGTGACCTTTGGGCGCGGTCTTGAGGACCTTCGGCCCCACCCACGAGGCCGCCGCCGGCCGATCGTATGACGTGCAATGTACGCCATAGGGTGCGTGTTACGGCGATTACCTGAAGGCGGCGGTGGTCCGGA",
171+
#"GGCCAACTGCACCGCGCTCTTGATGACATCGGTGGTCACCATGGTGTCCGGCATGATCAACCTCCGCTGTTCGATATCACCCCGATCTTTCTGAACGGCGGTTGGCAGACAACAGGGTCAATGGTCCCCAAGTGGATCACCGACGGGCGCGGACAAATGGCCCGCGCTTCGGGGACTTCTGTCCCTAGCCCTGGCCACGATGGGCTGGTCGGATCAAAGGCATCCGTTTCCATCGATTAGGAGGCATCAA",
172+
#"GTACATGTCCAGAGCGAGCCTCAGCTTCTGCGCAGCGACGGAAACTGCCACACTCAAAGCCTACTGGGCGCACGTGTGGCAACGAGTCGATCCACACGAAATGCCGCCGTTGGGCCGCGGACTAGCCGAATTTTCCGGGTGGTGACACAGCCCACATTTGGCATGGGACTTTCGGCCCTGTCCGCGTCCGTGTCGGCCAGACAAGCTTTGGGCATTGGCCACAATCGGGCCACAATCGAAAGCCGAGCAG",
173+
#"GGCAGCTGTCGGCAACTGTAAGCCATTTCTGGGACTTTGCTGTGAAAAGCTGGGCGATGGTTGTGGACCTGGACGAGCCACCCGTGCGATAGGTGAGATTCATTCTCGCCCTGACGGGTTGCGTCTGTCATCGGTCGATAAGGACTAACGGCCCTCAGGTGGGGACCAACGCCCCTGGGAGATAGCGGTCCCCGCCAGTAACGTACCGCTGAACCGACGGGATGTATCCGCCCCAGCGAAGGAGACGGCG",
174+
#"TCAGCACCATGACCGCCTGGCCACCAATCGCCCGTAACAAGCGGGACGTCCGCGACGACGCGTGCGCTAGCGCCGTGGCGGTGACAACGACCAGATATGGTCCGAGCACGCGGGCGAACCTCGTGTTCTGGCCTCGGCCAGTTGTGTAGAGCTCATCGCTGTCATCGAGCGATATCCGACCACTGATCCAAGTCGGGGGCTCTGGGGACCGAAGTCCCCGGGCTCGGAGCTATCGGACCTCACGATCACC"]
175+
#
176+
## set t equal to the number of strings in Dna, k equal to 15, and N equal to 100.
177+
#t=len(Dna)
178+
#k=15
179+
#N=100
180+
## Call RandomizedMotifSearch(Dna, k, t) N times, storing the best-scoring set of motifs
181+
## resulting from this algorithm in a variable called BestMotifs
182+
#M = RandomizedMotifSearch(Dna, k, t)
183+
#BestMotifs = M
184+
#for i in range(N):
185+
# if Score(M) < Score(BestMotifs):
186+
# BestMotifs = M
187+
## Print the BestMotifs variable
188+
#print(BestMotifs)
189+
## Print Score(BestMotifs)
190+
#print(Score(BestMotifs))
191+
192+
"""
193+
The function should divide each value in Probabilities by the sum of all values
194+
in Probabilities, then return the resulting dictionary
195+
"""
196+
197+
# Input: A dictionary Probabilities, where keys are k-mers and values are the
198+
# probabilities of these k-mers (which do not necessarily sum up to 1)
199+
# Output: A normalized dictionary where the probability of each k-mer was
200+
# divided by the sum of all k-mers' probabilities
201+
def Normalize(Probabilities):
202+
sumProb = sum(Probabilities.values())
203+
output = {k: v/sumProb for k,v in Probabilities.items()}
204+
return output
205+
206+
207+
# Probabilities = {'A': 0.15, 'B': 0.6, 'C': 0.225, 'D': 0.225, 'E': 0.3}
208+
# print(Normalize(Probabilities))
209+
210+
"""
211+
This function takes a dictionary Probabilities whose keys are k-mers and whose
212+
values are the probabilities of these k-mers. The function should return a
213+
randomly chosen k-mer key with respect to the values in Probabilities
214+
"""
215+
216+
# Input: A dictionary Probabilities whose keys are k-mers and whose values are the probabilities of these kmers
217+
# Output: A randomly chosen k-mer with respect to the values in Probabilities
218+
def WeightedDie(Probabilities):
219+
rand = random.uniform(0, 1)
220+
for k,v in Probabilities.items():
221+
rand-=v
222+
if rand<=0:
223+
return k
224+
225+
# Probabilities = {'AA': 0.2, 'AT': 0.4, 'CC': 0.1, 'GG': 0.1, 'TT': 0.2}
226+
# print(WeightedDie(Probabilities))
227+
228+
"""
229+
Now that we can simulate a weighted die roll over a collection of probabilities
230+
of strings, we need to make this function into a subroutine of a larger function
231+
that randomly chooses a k-mer from a string Text based on a profile matrix profile
232+
"""
233+
234+
# Input: A string Text, a profile matrix Profile, and an integer k
235+
# Output: ProfileGeneratedString(Text, profile, k)
236+
def ProfileGeneratedString(Text, profile, k):
237+
n = len(Text)
238+
probabilities = {}
239+
for i in range(0,n-k+1):
240+
probabilities[Text[i:i+k]] = Pr(Text[i:i+k], profile)
241+
probabilities = Normalize(probabilities)
242+
return WeightedDie(probabilities)
243+
244+
"""
245+
RandomizedMotifSearch may change all t strings in Motifs in a single iteration.
246+
This strategy may prove reckless, since some correct motifs (captured in Motifs)
247+
may potentially be discarded at the next iteration.
248+
249+
GibbsSampler is a more cautious iterative algorithm that discards a single k-mer
250+
from the current set of motifs at each iteration and decides to either keep it
251+
or replace it with a new one.
252+
"""
253+
254+
def GibbsSampler(Dna, k, t, N):
255+
Motifs = RandomMotifs(Dna, k, t)
256+
BestMotifs = Motifs
257+
for i in range(1,N):
258+
i = random.randint(0,t-1)
259+
Profile = ProfileWithPseudocounts(Motifs)
260+
Mi = ProfileGeneratedString(Dna[i], Profile, k)
261+
if Score(Motifs) < Score(BestMotifs):
262+
BestMotifs = Motifs
263+
else:
264+
return BestMotifs
265+
266+
#Dna =["GCGCCCCGCCCGGACAGCCATGCGCTAACCCTGGCTTCGATGGCGCCGGCTCAGTTAGGGCCGGAAGTCCCCAATGTGGCAGACCTTTCGCCCCTGGCGGACGAATGACCCCAGTGGCCGGGACTTCAGGCCCTATCGGAGGGCTCCGGCGCGGTGGTCGGATTTGTCTGTGGAGGTTACACCCCAATCGCAAGGATGCATTATGACCAGCGAGCTGAGCCTGGTCGCCACTGGAAAGGGGAGCAACATC", "CCGATCGGCATCACTATCGGTCCTGCGGCCGCCCATAGCGCTATATCCGGCTGGTGAAATCAATTGACAACCTTCGACTTTGAGGTGGCCTACGGCGAGGACAAGCCAGGCAAGCCAGCTGCCTCAACGCGCGCCAGTACGGGTCCATCGACCCGCGGCCCACGGGTCAAACGACCCTAGTGTTCGCTACGACGTGGTCGTACCTTCGGCAGCAGATCAGCAATAGCACCCCGACTCGAGGAGGATCCCG", "ACCGTCGATGTGCCCGGTCGCGCCGCGTCCACCTCGGTCATCGACCCCACGATGAGGACGCCATCGGCCGCGACCAAGCCCCGTGAAACTCTGACGGCGTGCTGGCCGGGCTGCGGCACCTGATCACCTTAGGGCACTTGGGCCACCACAACGGGCCGCCGGTCTCGACAGTGGCCACCACCACACAGGTGACTTCCGGCGGGACGTAAGTCCCTAACGCGTCGTTCCGCACGCGGTTAGCTTTGCTGCC", "GGGTCAGGTATATTTATCGCACACTTGGGCACATGACACACAAGCGCCAGAATCCCGGACCGAACCGAGCACCGTGGGTGGGCAGCCTCCATACAGCGATGACCTGATCGATCATCGGCCAGGGCGCCGGGCTTCCAACCGTGGCCGTCTCAGTACCCAGCCTCATTGACCCTTCGACGCATCCACTGCGCGTAAGTCGGCTCAACCCTTTCAAACCGCTGGATTACCGACCGCAGAAAGGGGGCAGGAC", "GTAGGTCAAACCGGGTGTACATACCCGCTCAATCGCCCAGCACTTCGGGCAGATCACCGGGTTTCCCCGGTATCACCAATACTGCCACCAAACACAGCAGGCGGGAAGGGGCGAAAGTCCCTTATCCGACAATAAAACTTCGCTTGTTCGACGCCCGGTTCACCCGATATGCACGGCGCCCAGCCATTCGTGACCGACGTCCCCAGCCCCAAGGCCGAACGACCCTAGGAGCCACGAGCAATTCACAGCG", "CCGCTGGCGACGCTGTTCGCCGGCAGCGTGCGTGACGACTTCGAGCTGCCCGACTACACCTGGTGACCACCGCCGACGGGCACCTCTCCGCCAGGTAGGCACGGTTTGTCGCCGGCAATGTGACCTTTGGGCGCGGTCTTGAGGACCTTCGGCCCCACCCACGAGGCCGCCGCCGGCCGATCGTATGACGTGCAATGTACGCCATAGGGTGCGTGTTACGGCGATTACCTGAAGGCGGCGGTGGTCCGGA", "GGCCAACTGCACCGCGCTCTTGATGACATCGGTGGTCACCATGGTGTCCGGCATGATCAACCTCCGCTGTTCGATATCACCCCGATCTTTCTGAACGGCGGTTGGCAGACAACAGGGTCAATGGTCCCCAAGTGGATCACCGACGGGCGCGGACAAATGGCCCGCGCTTCGGGGACTTCTGTCCCTAGCCCTGGCCACGATGGGCTGGTCGGATCAAAGGCATCCGTTTCCATCGATTAGGAGGCATCAA", "GTACATGTCCAGAGCGAGCCTCAGCTTCTGCGCAGCGACGGAAACTGCCACACTCAAAGCCTACTGGGCGCACGTGTGGCAACGAGTCGATCCACACGAAATGCCGCCGTTGGGCCGCGGACTAGCCGAATTTTCCGGGTGGTGACACAGCCCACATTTGGCATGGGACTTTCGGCCCTGTCCGCGTCCGTGTCGGCCAGACAAGCTTTGGGCATTGGCCACAATCGGGCCACAATCGAAAGCCGAGCAG", "GGCAGCTGTCGGCAACTGTAAGCCATTTCTGGGACTTTGCTGTGAAAAGCTGGGCGATGGTTGTGGACCTGGACGAGCCACCCGTGCGATAGGTGAGATTCATTCTCGCCCTGACGGGTTGCGTCTGTCATCGGTCGATAAGGACTAACGGCCCTCAGGTGGGGACCAACGCCCCTGGGAGATAGCGGTCCCCGCCAGTAACGTACCGCTGAACCGACGGGATGTATCCGCCCCAGCGAAGGAGACGGCG", "TCAGCACCATGACCGCCTGGCCACCAATCGCCCGTAACAAGCGGGACGTCCGCGACGACGCGTGCGCTAGCGCCGTGGCGGTGACAACGACCAGATATGGTCCGAGCACGCGGGCGAACCTCGTGTTCTGGCCTCGGCCAGTTGTGTAGAGCTCATCGCTGTCATCGAGCGATATCCGACCACTGATCCAAGTCGGGGGCTCTGGGGACCGAAGTCCCCGGGCTCGGAGCTATCGGACCTCACGATCACC"]
267+
#
268+
## set t equal to the number of strings in Dna, k equal to 15, and N equal to 100
269+
#t = len(Dna)
270+
#k = 15
271+
#N = 100
272+
#
273+
#
274+
## Call GibbsSampler(Dna, k, t, N) 20 times and store the best output in a variable called BestMotifs
275+
#M = GibbsSampler(Dna, k, t, N)
276+
#BestMotifs = M
277+
#for i in range(20):
278+
# if Score(GibbsSampler(Dna, k, t, N)) < Score(BestMotifs):
279+
# BestMotifs = M
280+
## Print the BestMotifs variable
281+
#print(BestMotifs)
282+
## Print Score(BestMotifs)
283+
#print(Score(BestMotifs))
284+
285+
286+
287+
288+
289+
290+
291+
##############################################################################
292+
293+
# Input: A list of strings Dna, and integers k and t
294+
# Output: RandomMotifs(Dna, k, t)
295+
# HINT: You might not actually need to use t since t = len(Dna), but you may find it convenient
296+
def RandomMotifs_Quizz():
297+
# place your code here.
298+
randomMotifs = []
299+
300+
randomMotifs.append("CCA")
301+
randomMotifs.append("CCT")
302+
randomMotifs.append("CTT")
303+
randomMotifs.append("TTG")
304+
305+
return randomMotifs
306+
307+
308+
# Input: Positive integers k and t, followed by a list of strings Dna
309+
# Output: RandomizedMotifSearch(Dna, k, t)
310+
def RandomizedMotifSearch_Quizz(Dna, k, t):
311+
# insert your code here
312+
313+
M = RandomMotifs_Quizz()
314+
BestMotifs = M
315+
316+
Profile = ProfileWithPseudocounts(M)
317+
M = Motifs(Profile, 3, Dna)
318+
print (M)
319+
320+
print (Score(M))
321+
print (Score(BestMotifs))
322+
323+
return
324+
325+
326+
import sys
327+
328+
# 3. Assume we are given the following strings Dna:
329+
DNA1 = "AAGCCAAA"
330+
DNA2 = "AATCCTGG"
331+
DNA3 = "GCTACTTG"
332+
DNA4 = "ATGTTTTG"
333+
334+
Dna = [ DNA1, DNA2, DNA3, DNA4 ]
335+
336+
337+
# Then, assume that RandomizedMotifSearch begins by randomly choosing the following 3-mers Motifs of Dna:
338+
"""
339+
CCA
340+
CCT
341+
CTT
342+
TTG
343+
"""
344+
345+
# What are the 3-mers after one iteration of RandomizedMotifSearch?
346+
# In other words, what are the 3-mers Motifs(Profile(Motifs), Dna)?
347+
# Please enter your answer as four space-separated strings.
348+
349+
350+
# set t equal to the number of strings in Dna and k equal to 3
351+
k = 3
352+
t = 4
353+
print(RandomizedMotifSearch_Quizz(Dna, k, t))
354+
355+
356+
357+
#Randomized algorithms that are not guaranteed to return exact solutions, but do quickly find approximate solutions, are named after the city of ___.
358+
#Monte Carlo
359+
360+
#Randomized algorithms are exact solutions, but not fast
361+
#Las Vegas
362+
363+
#Randomized algorithms are in between exact solutions, but in between fast
364+
#Atlantic City
365+
366+
367+
#Given the following code in Python:
368+
#import random
369+
#y=random.randint(1,10)
370+
#if y>=1 and y < 3:
371+
#print("A")
372+
#elif y>=3 and y<=7:
373+
#print("B")
374+
#else: print("C")
375+
#What is the probability (represented as a decimal) that "B" will be printed?
376+
#0.5
377+
378+
379+
#Which of the following motif-finding algorithms is guaranteed to find an optimum solution? In other words, which of the following are not heuristics? (Select all that apply.)
380+
#BruteForce
381+
382+
383+
384+
#Given the following "un-normalized" set of probabilities (i.e., that do not necessarily sum to 1):
385+
#0.22 0.54 0.58 0.36 0.3
386+
#What is the normalized set of probabilities? (Enter your answer as a sequence of space-separated numbers.)
387+
#0.11 0.27 0.29 0.18 0.15

0 commit comments

Comments
 (0)