1
+ """
2
+ @author: salimt
3
+ """
4
+
5
+ import random
6
+
7
+ """
8
+ The functions in Motif.py will return 0 for an entire motif probability even if only
9
+ one of the positions has a 0 probability of existing in the consensus string.
10
+
11
+ This doesn't make sense because a motif that differs from the consensus string
12
+ at every position will also get a total probability of 0.
13
+
14
+ In order to improve this unfair scoring, bioinformaticians often substitute zeroes
15
+ with small numbers called pseudocounts.
16
+ """
17
+ # Input: String Text, an integer k, and profile matrix Profile
18
+ # Output: String of most probable pattern
19
+ def ProfileMostProbableKmer (text , k , profile ):
20
+ maxProb = Pr (text [0 :k ], profile )
21
+ maxMotif = text [0 :0 + k ]
22
+ for i in range (1 , len (text )- k + 1 ):
23
+ tempProb = Pr (text [i :i + k ], profile )
24
+ if tempProb > maxProb :
25
+ maxProb = tempProb
26
+ maxMotif = text [i :i + k ]
27
+ return maxMotif
28
+
29
+ # Input: A set of kmers Motifs
30
+ # Output: CountWithPseudocounts(Motifs)
31
+ def CountWithPseudocounts (Motifs ):
32
+ t = len (Motifs )
33
+ k = len (Motifs [0 ])
34
+ initialList = {i : [0 ]* k for i in "ACGT" }
35
+ for i in range (k ):
36
+ for j in range (t ):
37
+ initialList [Motifs [j ][i ]][i ] = initialList .get (Motifs [j ][i ])[i ]+ 1
38
+ initialList = {k : [initialList .get (k )[i ]+ 1 for i in range (len (v ))] for k ,v in initialList .items ()}
39
+ return initialList
40
+
41
+ """
42
+ ProfileWithPseudocounts(Motifs) that takes a list of strings Motifs as input and
43
+ returns the profile matrix of Motifs with pseudocounts as a dictionary of lists
44
+ """
45
+
46
+ # Input: A set of kmers Motifs
47
+ # Output: ProfileWithPseudocounts(Motifs)
48
+ def ProfileWithPseudocounts (Motifs ):
49
+ t = len (Motifs )
50
+ k = len (Motifs [0 ])
51
+ profile = {} # output variable
52
+ countMotifs = CountWithPseudocounts (Motifs )
53
+ for key ,val in countMotifs .items ():
54
+ profile [key ] = [i / (t + 4 ) for i in val ]
55
+ return profile
56
+
57
+ # motif1 = "AACGTA"
58
+ # motif2 = "CCCGTT"
59
+ # motif3 = "CACCTT"
60
+ # motif4 = "GGATTA"
61
+ # motif5 = "TTCCGG"
62
+ # motifs = [motif1, motif2, motif3, motif4, motif5]
63
+ #
64
+ # print(ProfileWithPseudocounts(motifs))
65
+
66
+ """
67
+ Write a function GreedyMotifSearchWithPseudocounts(Dna, k, t) that takes a list
68
+ of strings Dna followed by integers k and t and returns the result of running
69
+ GreedyMotifSearch, where each profile matrix is generated with pseudocounts
70
+ """
71
+ # Input: A list of kmers Dna, and integers k and t (where t is the number of kmers in Dna)
72
+ # Output: GreedyMotifSearch(Dna, k, t)
73
+ def GreedyMotifSearchWithPseudocounts (Dna , k , t ):
74
+ BestMotifs = [Dna [i ][0 :k ] for i in range (0 , t )]
75
+ for i in range (len (Dna [0 ])- k + 1 ):
76
+ Motifs = []
77
+ Motifs .append (Dna [0 ][i :i + k ])
78
+ for j in range (1 , t ):
79
+ P = ProfileWithPseudocounts (Motifs [0 :j ])
80
+ Motifs .append (ProfileMostProbableKmer (Dna [j ], k , P ))
81
+ if Score (Motifs ) < Score (BestMotifs ):
82
+ BestMotifs = Motifs
83
+ return BestMotifs
84
+
85
+ # Input: A set of kmers Motifs
86
+ # Output: A consensus string of Motifs.
87
+ def Consensus (Motifs ):
88
+ finalMotif = []
89
+ motifCounts = CountWithPseudocounts (Motifs )
90
+ for i in range (len (Motifs [0 ])):
91
+ maxVal = 0
92
+ motif = ""
93
+ for k ,v in motifCounts .items ():
94
+ if v [i ] > maxVal :
95
+ maxVal = v [i ]
96
+ motif = k
97
+ finalMotif .append (motif )
98
+
99
+ return "" .join (finalMotif )
100
+
101
+ # Input: A set of k-mers Motifs
102
+ # Output: The score of these k-mers.
103
+ def Score (Motifs ):
104
+ consensus = Consensus (Motifs )
105
+ motifCounts = CountWithPseudocounts (Motifs )
106
+ score = [len (Motifs )- motifCounts .get (consensus [i ])[i ] for i in range (len (Motifs [0 ]))]
107
+ return sum (score )
108
+
109
+ # Input: String Text and profile matrix Profile
110
+ # Output: Probability value
111
+ def Pr (Text , Profile ):
112
+ score = [Profile .get (Text [i ])[i ] for i in range (len (Text ))]
113
+ product = 1
114
+ for x in score :
115
+ product *= x
116
+ return product
117
+
118
+ # k = 3
119
+ # t = 5
120
+ # Dna = ["GGCGTTCAGGCA", "AAGAATCAGTCA", "CAAGGAGTTCGC", "CACGTCAATCAC", "CAATAATATTCG"]
121
+ # print(GreedyMotifSearchWithPseudocounts(Dna, k, t))
122
+
123
+ # Input: A profile matrix Profile and a list of strings Dna
124
+ # Output: Profile-most probable k-mer from each row of Dna
125
+ def Motifs (Profile ,k , Dna ):
126
+ return [ProfileMostProbableKmer (dna ,k ,Profile ) for dna in Dna ]
127
+
128
+ # Profile = {'A': [0.8, 0.0, 0.0, 0.2],
129
+ # 'C': [0.0, 0.6, 0.2, 0.0],
130
+ # 'G': [0.2, 0.2, 0.8, 0.0],
131
+ # 'T': [0.0, 0.2, 0.0, 0.8]}
132
+ #
133
+ # Dnas = ["TTACCTTAAC", "GATGTCTGTC", "ACGGCGTTAG", "CCCTAACGAG", "CGTCAGAGGT"]
134
+ #
135
+ # print(Motifs(Profile, Dnas))
136
+
137
+ # Input: A list of strings Dna, and integers k and t
138
+ # Output: RandomMotifs(Dna, k, t)
139
+ # HINT: You might not actually need to use t since t = len(Dna), but you may find it convenient
140
+ def RandomMotifs (Dna , k , t ):
141
+ randMotifs = []
142
+ for dna in Dna :
143
+ randomNum = random .randint (1 , abs (k - t ))
144
+ randMotifs .append (dna [randomNum : randomNum + k ])
145
+ return randMotifs
146
+ #
147
+ # Dnas = ["TTACCTTAAC", "GATGTCTGTC", "ACGGCGTTAG", "CCCTAACGAG", "CGTCAGAGGT"]
148
+ # k = 3
149
+ # t = len(Dnas)
150
+ # print(RandomMotifs(Dnas, k, t))
151
+
152
+ # Input: Positive integers k and t, followed by a list of strings Dna
153
+ # Output: return a list of random kmer motifs
154
+ def RandomizedMotifSearch (Dna , k , t ):
155
+ M = RandomMotifs (Dna , k , t )
156
+ BestMotifs = M
157
+ while True :
158
+ Profile = ProfileWithPseudocounts (M )
159
+ M = Motifs (Profile ,k , Dna )
160
+ if Score (M ) < Score (BestMotifs ):
161
+ BestMotifs = M
162
+ else :
163
+ return BestMotifs
164
+
165
+ #Dna = ["GCGCCCCGCCCGGACAGCCATGCGCTAACCCTGGCTTCGATGGCGCCGGCTCAGTTAGGGCCGGAAGTCCCCAATGTGGCAGACCTTTCGCCCCTGGCGGACGAATGACCCCAGTGGCCGGGACTTCAGGCCCTATCGGAGGGCTCCGGCGCGGTGGTCGGATTTGTCTGTGGAGGTTACACCCCAATCGCAAGGATGCATTATGACCAGCGAGCTGAGCCTGGTCGCCACTGGAAAGGGGAGCAACATC",
166
+ #"CCGATCGGCATCACTATCGGTCCTGCGGCCGCCCATAGCGCTATATCCGGCTGGTGAAATCAATTGACAACCTTCGACTTTGAGGTGGCCTACGGCGAGGACAAGCCAGGCAAGCCAGCTGCCTCAACGCGCGCCAGTACGGGTCCATCGACCCGCGGCCCACGGGTCAAACGACCCTAGTGTTCGCTACGACGTGGTCGTACCTTCGGCAGCAGATCAGCAATAGCACCCCGACTCGAGGAGGATCCCG",
167
+ #"ACCGTCGATGTGCCCGGTCGCGCCGCGTCCACCTCGGTCATCGACCCCACGATGAGGACGCCATCGGCCGCGACCAAGCCCCGTGAAACTCTGACGGCGTGCTGGCCGGGCTGCGGCACCTGATCACCTTAGGGCACTTGGGCCACCACAACGGGCCGCCGGTCTCGACAGTGGCCACCACCACACAGGTGACTTCCGGCGGGACGTAAGTCCCTAACGCGTCGTTCCGCACGCGGTTAGCTTTGCTGCC",
168
+ #"GGGTCAGGTATATTTATCGCACACTTGGGCACATGACACACAAGCGCCAGAATCCCGGACCGAACCGAGCACCGTGGGTGGGCAGCCTCCATACAGCGATGACCTGATCGATCATCGGCCAGGGCGCCGGGCTTCCAACCGTGGCCGTCTCAGTACCCAGCCTCATTGACCCTTCGACGCATCCACTGCGCGTAAGTCGGCTCAACCCTTTCAAACCGCTGGATTACCGACCGCAGAAAGGGGGCAGGAC",
169
+ #"GTAGGTCAAACCGGGTGTACATACCCGCTCAATCGCCCAGCACTTCGGGCAGATCACCGGGTTTCCCCGGTATCACCAATACTGCCACCAAACACAGCAGGCGGGAAGGGGCGAAAGTCCCTTATCCGACAATAAAACTTCGCTTGTTCGACGCCCGGTTCACCCGATATGCACGGCGCCCAGCCATTCGTGACCGACGTCCCCAGCCCCAAGGCCGAACGACCCTAGGAGCCACGAGCAATTCACAGCG",
170
+ #"CCGCTGGCGACGCTGTTCGCCGGCAGCGTGCGTGACGACTTCGAGCTGCCCGACTACACCTGGTGACCACCGCCGACGGGCACCTCTCCGCCAGGTAGGCACGGTTTGTCGCCGGCAATGTGACCTTTGGGCGCGGTCTTGAGGACCTTCGGCCCCACCCACGAGGCCGCCGCCGGCCGATCGTATGACGTGCAATGTACGCCATAGGGTGCGTGTTACGGCGATTACCTGAAGGCGGCGGTGGTCCGGA",
171
+ #"GGCCAACTGCACCGCGCTCTTGATGACATCGGTGGTCACCATGGTGTCCGGCATGATCAACCTCCGCTGTTCGATATCACCCCGATCTTTCTGAACGGCGGTTGGCAGACAACAGGGTCAATGGTCCCCAAGTGGATCACCGACGGGCGCGGACAAATGGCCCGCGCTTCGGGGACTTCTGTCCCTAGCCCTGGCCACGATGGGCTGGTCGGATCAAAGGCATCCGTTTCCATCGATTAGGAGGCATCAA",
172
+ #"GTACATGTCCAGAGCGAGCCTCAGCTTCTGCGCAGCGACGGAAACTGCCACACTCAAAGCCTACTGGGCGCACGTGTGGCAACGAGTCGATCCACACGAAATGCCGCCGTTGGGCCGCGGACTAGCCGAATTTTCCGGGTGGTGACACAGCCCACATTTGGCATGGGACTTTCGGCCCTGTCCGCGTCCGTGTCGGCCAGACAAGCTTTGGGCATTGGCCACAATCGGGCCACAATCGAAAGCCGAGCAG",
173
+ #"GGCAGCTGTCGGCAACTGTAAGCCATTTCTGGGACTTTGCTGTGAAAAGCTGGGCGATGGTTGTGGACCTGGACGAGCCACCCGTGCGATAGGTGAGATTCATTCTCGCCCTGACGGGTTGCGTCTGTCATCGGTCGATAAGGACTAACGGCCCTCAGGTGGGGACCAACGCCCCTGGGAGATAGCGGTCCCCGCCAGTAACGTACCGCTGAACCGACGGGATGTATCCGCCCCAGCGAAGGAGACGGCG",
174
+ #"TCAGCACCATGACCGCCTGGCCACCAATCGCCCGTAACAAGCGGGACGTCCGCGACGACGCGTGCGCTAGCGCCGTGGCGGTGACAACGACCAGATATGGTCCGAGCACGCGGGCGAACCTCGTGTTCTGGCCTCGGCCAGTTGTGTAGAGCTCATCGCTGTCATCGAGCGATATCCGACCACTGATCCAAGTCGGGGGCTCTGGGGACCGAAGTCCCCGGGCTCGGAGCTATCGGACCTCACGATCACC"]
175
+ #
176
+ ## set t equal to the number of strings in Dna, k equal to 15, and N equal to 100.
177
+ #t=len(Dna)
178
+ #k=15
179
+ #N=100
180
+ ## Call RandomizedMotifSearch(Dna, k, t) N times, storing the best-scoring set of motifs
181
+ ## resulting from this algorithm in a variable called BestMotifs
182
+ #M = RandomizedMotifSearch(Dna, k, t)
183
+ #BestMotifs = M
184
+ #for i in range(N):
185
+ # if Score(M) < Score(BestMotifs):
186
+ # BestMotifs = M
187
+ ## Print the BestMotifs variable
188
+ #print(BestMotifs)
189
+ ## Print Score(BestMotifs)
190
+ #print(Score(BestMotifs))
191
+
192
+ """
193
+ The function should divide each value in Probabilities by the sum of all values
194
+ in Probabilities, then return the resulting dictionary
195
+ """
196
+
197
+ # Input: A dictionary Probabilities, where keys are k-mers and values are the
198
+ # probabilities of these k-mers (which do not necessarily sum up to 1)
199
+ # Output: A normalized dictionary where the probability of each k-mer was
200
+ # divided by the sum of all k-mers' probabilities
201
+ def Normalize (Probabilities ):
202
+ sumProb = sum (Probabilities .values ())
203
+ output = {k : v / sumProb for k ,v in Probabilities .items ()}
204
+ return output
205
+
206
+
207
+ # Probabilities = {'A': 0.15, 'B': 0.6, 'C': 0.225, 'D': 0.225, 'E': 0.3}
208
+ # print(Normalize(Probabilities))
209
+
210
+ """
211
+ This function takes a dictionary Probabilities whose keys are k-mers and whose
212
+ values are the probabilities of these k-mers. The function should return a
213
+ randomly chosen k-mer key with respect to the values in Probabilities
214
+ """
215
+
216
+ # Input: A dictionary Probabilities whose keys are k-mers and whose values are the probabilities of these kmers
217
+ # Output: A randomly chosen k-mer with respect to the values in Probabilities
218
+ def WeightedDie (Probabilities ):
219
+ rand = random .uniform (0 , 1 )
220
+ for k ,v in Probabilities .items ():
221
+ rand -= v
222
+ if rand <= 0 :
223
+ return k
224
+
225
+ # Probabilities = {'AA': 0.2, 'AT': 0.4, 'CC': 0.1, 'GG': 0.1, 'TT': 0.2}
226
+ # print(WeightedDie(Probabilities))
227
+
228
+ """
229
+ Now that we can simulate a weighted die roll over a collection of probabilities
230
+ of strings, we need to make this function into a subroutine of a larger function
231
+ that randomly chooses a k-mer from a string Text based on a profile matrix profile
232
+ """
233
+
234
+ # Input: A string Text, a profile matrix Profile, and an integer k
235
+ # Output: ProfileGeneratedString(Text, profile, k)
236
+ def ProfileGeneratedString (Text , profile , k ):
237
+ n = len (Text )
238
+ probabilities = {}
239
+ for i in range (0 ,n - k + 1 ):
240
+ probabilities [Text [i :i + k ]] = Pr (Text [i :i + k ], profile )
241
+ probabilities = Normalize (probabilities )
242
+ return WeightedDie (probabilities )
243
+
244
+ """
245
+ RandomizedMotifSearch may change all t strings in Motifs in a single iteration.
246
+ This strategy may prove reckless, since some correct motifs (captured in Motifs)
247
+ may potentially be discarded at the next iteration.
248
+
249
+ GibbsSampler is a more cautious iterative algorithm that discards a single k-mer
250
+ from the current set of motifs at each iteration and decides to either keep it
251
+ or replace it with a new one.
252
+ """
253
+
254
+ def GibbsSampler (Dna , k , t , N ):
255
+ Motifs = RandomMotifs (Dna , k , t )
256
+ BestMotifs = Motifs
257
+ for i in range (1 ,N ):
258
+ i = random .randint (0 ,t - 1 )
259
+ Profile = ProfileWithPseudocounts (Motifs )
260
+ Mi = ProfileGeneratedString (Dna [i ], Profile , k )
261
+ if Score (Motifs ) < Score (BestMotifs ):
262
+ BestMotifs = Motifs
263
+ else :
264
+ return BestMotifs
265
+
266
+ #Dna =["GCGCCCCGCCCGGACAGCCATGCGCTAACCCTGGCTTCGATGGCGCCGGCTCAGTTAGGGCCGGAAGTCCCCAATGTGGCAGACCTTTCGCCCCTGGCGGACGAATGACCCCAGTGGCCGGGACTTCAGGCCCTATCGGAGGGCTCCGGCGCGGTGGTCGGATTTGTCTGTGGAGGTTACACCCCAATCGCAAGGATGCATTATGACCAGCGAGCTGAGCCTGGTCGCCACTGGAAAGGGGAGCAACATC", "CCGATCGGCATCACTATCGGTCCTGCGGCCGCCCATAGCGCTATATCCGGCTGGTGAAATCAATTGACAACCTTCGACTTTGAGGTGGCCTACGGCGAGGACAAGCCAGGCAAGCCAGCTGCCTCAACGCGCGCCAGTACGGGTCCATCGACCCGCGGCCCACGGGTCAAACGACCCTAGTGTTCGCTACGACGTGGTCGTACCTTCGGCAGCAGATCAGCAATAGCACCCCGACTCGAGGAGGATCCCG", "ACCGTCGATGTGCCCGGTCGCGCCGCGTCCACCTCGGTCATCGACCCCACGATGAGGACGCCATCGGCCGCGACCAAGCCCCGTGAAACTCTGACGGCGTGCTGGCCGGGCTGCGGCACCTGATCACCTTAGGGCACTTGGGCCACCACAACGGGCCGCCGGTCTCGACAGTGGCCACCACCACACAGGTGACTTCCGGCGGGACGTAAGTCCCTAACGCGTCGTTCCGCACGCGGTTAGCTTTGCTGCC", "GGGTCAGGTATATTTATCGCACACTTGGGCACATGACACACAAGCGCCAGAATCCCGGACCGAACCGAGCACCGTGGGTGGGCAGCCTCCATACAGCGATGACCTGATCGATCATCGGCCAGGGCGCCGGGCTTCCAACCGTGGCCGTCTCAGTACCCAGCCTCATTGACCCTTCGACGCATCCACTGCGCGTAAGTCGGCTCAACCCTTTCAAACCGCTGGATTACCGACCGCAGAAAGGGGGCAGGAC", "GTAGGTCAAACCGGGTGTACATACCCGCTCAATCGCCCAGCACTTCGGGCAGATCACCGGGTTTCCCCGGTATCACCAATACTGCCACCAAACACAGCAGGCGGGAAGGGGCGAAAGTCCCTTATCCGACAATAAAACTTCGCTTGTTCGACGCCCGGTTCACCCGATATGCACGGCGCCCAGCCATTCGTGACCGACGTCCCCAGCCCCAAGGCCGAACGACCCTAGGAGCCACGAGCAATTCACAGCG", "CCGCTGGCGACGCTGTTCGCCGGCAGCGTGCGTGACGACTTCGAGCTGCCCGACTACACCTGGTGACCACCGCCGACGGGCACCTCTCCGCCAGGTAGGCACGGTTTGTCGCCGGCAATGTGACCTTTGGGCGCGGTCTTGAGGACCTTCGGCCCCACCCACGAGGCCGCCGCCGGCCGATCGTATGACGTGCAATGTACGCCATAGGGTGCGTGTTACGGCGATTACCTGAAGGCGGCGGTGGTCCGGA", "GGCCAACTGCACCGCGCTCTTGATGACATCGGTGGTCACCATGGTGTCCGGCATGATCAACCTCCGCTGTTCGATATCACCCCGATCTTTCTGAACGGCGGTTGGCAGACAACAGGGTCAATGGTCCCCAAGTGGATCACCGACGGGCGCGGACAAATGGCCCGCGCTTCGGGGACTTCTGTCCCTAGCCCTGGCCACGATGGGCTGGTCGGATCAAAGGCATCCGTTTCCATCGATTAGGAGGCATCAA", "GTACATGTCCAGAGCGAGCCTCAGCTTCTGCGCAGCGACGGAAACTGCCACACTCAAAGCCTACTGGGCGCACGTGTGGCAACGAGTCGATCCACACGAAATGCCGCCGTTGGGCCGCGGACTAGCCGAATTTTCCGGGTGGTGACACAGCCCACATTTGGCATGGGACTTTCGGCCCTGTCCGCGTCCGTGTCGGCCAGACAAGCTTTGGGCATTGGCCACAATCGGGCCACAATCGAAAGCCGAGCAG", "GGCAGCTGTCGGCAACTGTAAGCCATTTCTGGGACTTTGCTGTGAAAAGCTGGGCGATGGTTGTGGACCTGGACGAGCCACCCGTGCGATAGGTGAGATTCATTCTCGCCCTGACGGGTTGCGTCTGTCATCGGTCGATAAGGACTAACGGCCCTCAGGTGGGGACCAACGCCCCTGGGAGATAGCGGTCCCCGCCAGTAACGTACCGCTGAACCGACGGGATGTATCCGCCCCAGCGAAGGAGACGGCG", "TCAGCACCATGACCGCCTGGCCACCAATCGCCCGTAACAAGCGGGACGTCCGCGACGACGCGTGCGCTAGCGCCGTGGCGGTGACAACGACCAGATATGGTCCGAGCACGCGGGCGAACCTCGTGTTCTGGCCTCGGCCAGTTGTGTAGAGCTCATCGCTGTCATCGAGCGATATCCGACCACTGATCCAAGTCGGGGGCTCTGGGGACCGAAGTCCCCGGGCTCGGAGCTATCGGACCTCACGATCACC"]
267
+ #
268
+ ## set t equal to the number of strings in Dna, k equal to 15, and N equal to 100
269
+ #t = len(Dna)
270
+ #k = 15
271
+ #N = 100
272
+ #
273
+ #
274
+ ## Call GibbsSampler(Dna, k, t, N) 20 times and store the best output in a variable called BestMotifs
275
+ #M = GibbsSampler(Dna, k, t, N)
276
+ #BestMotifs = M
277
+ #for i in range(20):
278
+ # if Score(GibbsSampler(Dna, k, t, N)) < Score(BestMotifs):
279
+ # BestMotifs = M
280
+ ## Print the BestMotifs variable
281
+ #print(BestMotifs)
282
+ ## Print Score(BestMotifs)
283
+ #print(Score(BestMotifs))
284
+
285
+
286
+
287
+
288
+
289
+
290
+
291
+ ##############################################################################
292
+
293
+ # Input: A list of strings Dna, and integers k and t
294
+ # Output: RandomMotifs(Dna, k, t)
295
+ # HINT: You might not actually need to use t since t = len(Dna), but you may find it convenient
296
+ def RandomMotifs_Quizz ():
297
+ # place your code here.
298
+ randomMotifs = []
299
+
300
+ randomMotifs .append ("CCA" )
301
+ randomMotifs .append ("CCT" )
302
+ randomMotifs .append ("CTT" )
303
+ randomMotifs .append ("TTG" )
304
+
305
+ return randomMotifs
306
+
307
+
308
+ # Input: Positive integers k and t, followed by a list of strings Dna
309
+ # Output: RandomizedMotifSearch(Dna, k, t)
310
+ def RandomizedMotifSearch_Quizz (Dna , k , t ):
311
+ # insert your code here
312
+
313
+ M = RandomMotifs_Quizz ()
314
+ BestMotifs = M
315
+
316
+ Profile = ProfileWithPseudocounts (M )
317
+ M = Motifs (Profile , 3 , Dna )
318
+ print (M )
319
+
320
+ print (Score (M ))
321
+ print (Score (BestMotifs ))
322
+
323
+ return
324
+
325
+
326
+ import sys
327
+
328
+ # 3. Assume we are given the following strings Dna:
329
+ DNA1 = "AAGCCAAA"
330
+ DNA2 = "AATCCTGG"
331
+ DNA3 = "GCTACTTG"
332
+ DNA4 = "ATGTTTTG"
333
+
334
+ Dna = [ DNA1 , DNA2 , DNA3 , DNA4 ]
335
+
336
+
337
+ # Then, assume that RandomizedMotifSearch begins by randomly choosing the following 3-mers Motifs of Dna:
338
+ """
339
+ CCA
340
+ CCT
341
+ CTT
342
+ TTG
343
+ """
344
+
345
+ # What are the 3-mers after one iteration of RandomizedMotifSearch?
346
+ # In other words, what are the 3-mers Motifs(Profile(Motifs), Dna)?
347
+ # Please enter your answer as four space-separated strings.
348
+
349
+
350
+ # set t equal to the number of strings in Dna and k equal to 3
351
+ k = 3
352
+ t = 4
353
+ print (RandomizedMotifSearch_Quizz (Dna , k , t ))
354
+
355
+
356
+
357
+ #Randomized algorithms that are not guaranteed to return exact solutions, but do quickly find approximate solutions, are named after the city of ___.
358
+ #Monte Carlo
359
+
360
+ #Randomized algorithms are exact solutions, but not fast
361
+ #Las Vegas
362
+
363
+ #Randomized algorithms are in between exact solutions, but in between fast
364
+ #Atlantic City
365
+
366
+
367
+ #Given the following code in Python:
368
+ #import random
369
+ #y=random.randint(1,10)
370
+ #if y>=1 and y < 3:
371
+ #print("A")
372
+ #elif y>=3 and y<=7:
373
+ #print("B")
374
+ #else: print("C")
375
+ #What is the probability (represented as a decimal) that "B" will be printed?
376
+ #0.5
377
+
378
+
379
+ #Which of the following motif-finding algorithms is guaranteed to find an optimum solution? In other words, which of the following are not heuristics? (Select all that apply.)
380
+ #BruteForce
381
+
382
+
383
+
384
+ #Given the following "un-normalized" set of probabilities (i.e., that do not necessarily sum to 1):
385
+ #0.22 0.54 0.58 0.36 0.3
386
+ #What is the normalized set of probabilities? (Enter your answer as a sequence of space-separated numbers.)
387
+ #0.11 0.27 0.29 0.18 0.15
0 commit comments