fork(27) download
  1. def BuildProfileMatrix(dnamatrix):
  2. ProfileMatrix = [[1 for x in xrange(len(dnamatrix[0]))] for x in xrange(4)]
  3. indices = {'A':0, 'C':1, 'G': 2, 'T':3}
  4. for seq in dnamatrix:
  5. for i in xrange(len(dnamatrix[0])):
  6. ProfileMatrix[indices[seq[i]]][i] += 1
  7. ProbMatrix = [[float(x)/sum(zip(*ProfileMatrix)[0]) for x in y] for y in ProfileMatrix]
  8. return ProbMatrix
  9.  
  10. def ProfileRandomGenerator(profile, dna, k, i):
  11. indices = {'A':0, 'C':1, 'G': 2, 'T':3}
  12. score_list = []
  13. for x in xrange(len(dna[i]) - k + 1):
  14. probability = 1
  15. window = dna[i][x : k + x]
  16.  
  17. for y in xrange(k):
  18. probability *= profile[indices[window[y]]][y]
  19.  
  20. score_list.append(probability)
  21.  
  22. rnd = uniform(0, sum(score_list))
  23.  
  24. current = 0
  25.  
  26. for z, bias in enumerate(score_list):
  27. current += bias
  28. if rnd <= current:
  29. return dna[i][z : k + z]
  30.  
  31. def score(motifs):
  32. ProfileMatrix = [[0 for x in xrange(len(motifs[0]))] for x in xrange(4)]
  33.  
  34. indices = {'A':0, 'C':1, 'G': 2, 'T':3}
  35.  
  36. for seq in motifs:
  37. for i in xrange(len(motifs[0])):
  38. ProfileMatrix[indices[seq[i]]][i] += 1
  39. score = len(motifs)*len(motifs[0]) - sum([max(x) for x in zip(*ProfileMatrix)])
  40. return score
  41.  
  42. from random import randint, uniform
  43.  
  44. def GibbsSampler(k, t, N):
  45. dna = ['CGCCCCTCTCGGGGGTGTTCAGTAACCGGCCA',
  46. 'GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG',
  47. 'TAGTACCGAGACCGAAAGAAGTATACAGGCGT',
  48. 'TAGATCAAGTTTCAGGTGCACGTCGGTGAACC',
  49. 'AATCCACCAGCTCCACGTGCAATGTTGGCCTA']
  50. Motifs = []
  51. for i in [randint(0, len(dna[0])-k) for x in range(len(dna))]:
  52. j = 0
  53. kmer = dna[j][i : k+i]
  54. j += 1
  55. Motifs.append(kmer)
  56. BestMotifs = []
  57. s_best = float('inf')
  58. for i in xrange(N):
  59. x = randint(0, t-1)
  60. Motifs.pop(x)
  61. profile = BuildProfileMatrix(Motifs)
  62. Motif = ProfileRandomGenerator(profile, dna, k, x)
  63. Motifs.insert(x, Motif)
  64. s_motifs = score(Motifs)
  65. if s_motifs < s_best:
  66. s_best = s_motifs
  67. BestMotifs = Motifs
  68. return [s_best, BestMotifs]
  69.  
  70. k, t, N =8, 5, 100
  71. best_motifs = [float('inf'), None]
  72.  
  73. # Repeat the Gibbs sampler search 20 times.
  74. for repeat in xrange(20):
  75. current_motifs = GibbsSampler(k, t, N)
  76. if current_motifs[0] < best_motifs[0]:
  77. best_motifs = current_motifs
  78. # Print and save the answer.
  79. print ('\n'.join(best_motifs[1]))
Success #stdin #stdout 0.12s 11504KB
stdin
Standard input is empty
stdout
CGCCCCTC
GGGCGAGG
TAGTACCG
TAGATCAA
AATCCACC