def BuildProfileMatrix(dnamatrix):
ProfileMatrix = [[1 for x in xrange(len(dnamatrix[0]))] for x in xrange(4)]
indices = {'A':0, 'C':1, 'G': 2, 'T':3}
for seq in dnamatrix:
for i in xrange(len(dnamatrix[0])):
ProfileMatrix[indices[seq[i]]][i] += 1
ProbMatrix = [[float(x)/sum(zip(*ProfileMatrix)[0]) for x in y] for y in ProfileMatrix]
return ProbMatrix
def ProfileRandomGenerator(profile, dna, k, i):
indices = {'A':0, 'C':1, 'G': 2, 'T':3}
score_list = []
for x in xrange(len(dna[i]) - k + 1):
probability = 1
window = dna[i][x : k + x]
for y in xrange(k):
probability *= profile[indices[window[y]]][y]
score_list.append(probability)
rnd = uniform(0, sum(score_list))
current = 0
for z, bias in enumerate(score_list):
current += bias
if rnd <= current:
return dna[i][z : k + z]
def score(motifs):
ProfileMatrix = [[0 for x in xrange(len(motifs[0]))] for x in xrange(4)]
indices = {'A':0, 'C':1, 'G': 2, 'T':3}
for seq in motifs:
for i in xrange(len(motifs[0])):
ProfileMatrix[indices[seq[i]]][i] += 1
score = len(motifs)*len(motifs[0]) - sum([max(x) for x in zip(*ProfileMatrix)])
return score
from random import randint, uniform
def GibbsSampler(k, t, N):
dna = ['CGCCCCTCTCGGGGGTGTTCAGTAACCGGCCA',
'GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG',
'TAGTACCGAGACCGAAAGAAGTATACAGGCGT',
'TAGATCAAGTTTCAGGTGCACGTCGGTGAACC',
'AATCCACCAGCTCCACGTGCAATGTTGGCCTA']
Motifs = []
for i in [randint(0, len(dna[0])-k) for x in range(len(dna))]:
j = 0
kmer = dna[j][i : k+i]
j += 1
Motifs.append(kmer)
BestMotifs = []
s_best = float('inf')
for i in xrange(N):
x = randint(0, t-1)
Motifs.pop(x)
profile = BuildProfileMatrix(Motifs)
Motif = ProfileRandomGenerator(profile, dna, k, x)
Motifs.insert(x, Motif)
s_motifs = score(Motifs)
if s_motifs < s_best:
s_best = s_motifs
BestMotifs = Motifs
return [s_best, BestMotifs]
k, t, N =8, 5, 100
best_motifs = [float('inf'), None]
# Repeat the Gibbs sampler search 20 times.
for repeat in xrange(20):
current_motifs = GibbsSampler(k, t, N)
if current_motifs[0] < best_motifs[0]:
best_motifs = current_motifs
# Print and save the answer.
print ('\n'.join(best_motifs[1]))
ZGVmIEJ1aWxkUHJvZmlsZU1hdHJpeChkbmFtYXRyaXgpOgogICAgUHJvZmlsZU1hdHJpeCA9IFtbMSBmb3IgeCBpbiB4cmFuZ2UobGVuKGRuYW1hdHJpeFswXSkpXSBmb3IgeCBpbiB4cmFuZ2UoNCldCiAgICBpbmRpY2VzID0geydBJzowLCAnQyc6MSwgJ0cnOiAyLCAnVCc6M30KICAgIGZvciBzZXEgaW4gZG5hbWF0cml4OgogICAgICAgIGZvciBpIGluIHhyYW5nZShsZW4oZG5hbWF0cml4WzBdKSk6ICAgICAgICAgICAgCiAgICAgICAgICAgIFByb2ZpbGVNYXRyaXhbaW5kaWNlc1tzZXFbaV1dXVtpXSArPSAxCiAgICBQcm9iTWF0cml4ID0gW1tmbG9hdCh4KS9zdW0oemlwKCpQcm9maWxlTWF0cml4KVswXSkgZm9yIHggaW4geV0gZm9yIHkgaW4gUHJvZmlsZU1hdHJpeF0KICAgIHJldHVybiBQcm9iTWF0cml4CgpkZWYgUHJvZmlsZVJhbmRvbUdlbmVyYXRvcihwcm9maWxlLCBkbmEsIGssIGkpOgogICAgaW5kaWNlcyA9IHsnQSc6MCwgJ0MnOjEsICdHJzogMiwgJ1QnOjN9CiAgICBzY29yZV9saXN0ID0gW10KICAgIGZvciB4IGluIHhyYW5nZShsZW4oZG5hW2ldKSAtIGsgKyAxKToKICAgICAgICBwcm9iYWJpbGl0eSA9IDEKICAgICAgICB3aW5kb3cgPSBkbmFbaV1beCA6IGsgKyB4XQoKICAgIGZvciB5IGluIHhyYW5nZShrKToKICAgICAgICBwcm9iYWJpbGl0eSAqPSBwcm9maWxlW2luZGljZXNbd2luZG93W3ldXV1beV0KCiAgICBzY29yZV9saXN0LmFwcGVuZChwcm9iYWJpbGl0eSkKCiAgICBybmQgPSB1bmlmb3JtKDAsIHN1bShzY29yZV9saXN0KSkKCiAgICBjdXJyZW50ID0gMAoKICAgIGZvciB6LCBiaWFzIGluIGVudW1lcmF0ZShzY29yZV9saXN0KToKICAgICAgICBjdXJyZW50ICs9IGJpYXMKICAgICAgICBpZiBybmQgPD0gY3VycmVudDoKICAgICAgICAgICAgcmV0dXJuIGRuYVtpXVt6IDogayArIHpdCgpkZWYgc2NvcmUobW90aWZzKToKICAgIFByb2ZpbGVNYXRyaXggPSBbWzAgZm9yIHggaW4geHJhbmdlKGxlbihtb3RpZnNbMF0pKV0gZm9yIHggaW4geHJhbmdlKDQpXQogICAgCiAgICBpbmRpY2VzID0geydBJzowLCAnQyc6MSwgJ0cnOiAyLCAnVCc6M30KICAgIAogICAgZm9yIHNlcSBpbiBtb3RpZnM6CiAgICAgICAgZm9yIGkgaW4geHJhbmdlKGxlbihtb3RpZnNbMF0pKTogICAgICAgICAgICAKICAgICAgICAgICAgUHJvZmlsZU1hdHJpeFtpbmRpY2VzW3NlcVtpXV1dW2ldICs9IDEKICAgIHNjb3JlID0gbGVuKG1vdGlmcykqbGVuKG1vdGlmc1swXSkgLSBzdW0oW21heCh4KSBmb3IgeCBpbiB6aXAoKlByb2ZpbGVNYXRyaXgpXSkKICAgIHJldHVybiBzY29yZQoKZnJvbSByYW5kb20gaW1wb3J0IHJhbmRpbnQsIHVuaWZvcm0gICAKCmRlZiBHaWJic1NhbXBsZXIoaywgdCwgTik6CiAgICBkbmEgPSBbJ0NHQ0NDQ1RDVENHR0dHR1RHVFRDQUdUQUFDQ0dHQ0NBJywKICAgICdHR0dDR0FHR1RBVEdUR1RBQUdUR0NDQUFHR1RHQ0NBRycsCiAgICAnVEFHVEFDQ0dBR0FDQ0dBQUFHQUFHVEFUQUNBR0dDR1QnLAogICAgJ1RBR0FUQ0FBR1RUVENBR0dUR0NBQ0dUQ0dHVEdBQUNDJywKICAgICdBQVRDQ0FDQ0FHQ1RDQ0FDR1RHQ0FBVEdUVEdHQ0NUQSddCiAgICBNb3RpZnMgPSBbXQogICAgZm9yIGkgaW4gW3JhbmRpbnQoMCwgbGVuKGRuYVswXSktaykgZm9yIHggaW4gcmFuZ2UobGVuKGRuYSkpXToKICAgICAgICBqID0gMAogICAgICAgIGttZXIgPSBkbmFbal1baSA6IGsraV0KICAgICAgICBqICs9IDEKICAgICAgICBNb3RpZnMuYXBwZW5kKGttZXIpCiAgICBCZXN0TW90aWZzID0gW10KICAgIHNfYmVzdCA9IGZsb2F0KCdpbmYnKQogICAgZm9yIGkgaW4geHJhbmdlKE4pOgogICAgICAgIHggPSByYW5kaW50KDAsIHQtMSkKICAgICAgICBNb3RpZnMucG9wKHgpCiAgICAgICAgcHJvZmlsZSA9IEJ1aWxkUHJvZmlsZU1hdHJpeChNb3RpZnMpCiAgICAgICAgTW90aWYgPSBQcm9maWxlUmFuZG9tR2VuZXJhdG9yKHByb2ZpbGUsIGRuYSwgaywgeCkKICAgICAgICBNb3RpZnMuaW5zZXJ0KHgsIE1vdGlmKQogICAgICAgIHNfbW90aWZzID0gc2NvcmUoTW90aWZzKQogICAgICAgIGlmIHNfbW90aWZzIDwgc19iZXN0OgogICAgICAgICAgICBzX2Jlc3QgPSBzX21vdGlmcwogICAgICAgICAgICBCZXN0TW90aWZzID0gTW90aWZzCiAgICByZXR1cm4gW3NfYmVzdCwgQmVzdE1vdGlmc10KCmssIHQsIE4gPTgsIDUsIDEwMCAgICAgICAgICAgIApiZXN0X21vdGlmcyA9IFtmbG9hdCgnaW5mJyksIE5vbmVdCgojIFJlcGVhdCB0aGUgR2liYnMgc2FtcGxlciBzZWFyY2ggMjAgdGltZXMuCmZvciByZXBlYXQgaW4geHJhbmdlKDIwKToKICAgIGN1cnJlbnRfbW90aWZzID0gR2liYnNTYW1wbGVyKGssIHQsIE4pCiAgICBpZiBjdXJyZW50X21vdGlmc1swXSA8IGJlc3RfbW90aWZzWzBdOgogICAgICAgIGJlc3RfbW90aWZzID0gY3VycmVudF9tb3RpZnMKIyBQcmludCBhbmQgc2F2ZSB0aGUgYW5zd2VyLgpwcmludCAoJ1xuJy5qb2luKGJlc3RfbW90aWZzWzFdKSk=