# http://stackoverflow.com/q/33181434/5290909
# Capture every character repeated N times
import re
text = 'abcdbcdcdd'
minN = 2
maxN = 5
# init
maxN += 1 #maxN as upper bound
result = [['-' for i in range(len(text))] for i in range(maxN)] #initialize result list
def RepeatedCharsPattern(min_repeat):
# Generate pattern
patt = r'(\w)(?:(?=(?:.*?\1){' + str(min_repeat) + r'})|(?='
for i in range( 1, min_repeat):
patt += r'(?:(?!\1).)*(\1)'
patt += r'(?!.*?\1)))'
return patt
# Example for N = minN to maxN (2 to 5)
for N in range(minN,maxN):
pattern = RepeatedCharsPattern(N)
regex = re.compile(pattern)
print("Pattern: r'%s'\nN = %s\tText: '%s'" %(pattern, N, text))
# Loop matches
m = 0
for match in regex.finditer(text):
m += 1
# Loop groups
for c in range(1,regex.groups + 1):
char = match.group(c)
if char is not None:
index = match.start(c)
print( '\tMatch #%s group(%s)\t[%s:%s]:\t"%s"' %(m, c, index, match.end(c), char))
# add the char to a list[N][index] to keep ordered matches
result[N][index] = char
if m == 0:
print("\tNo chars repeated %s times!" % N)
# Print all characters matched in the original order
print("RESULT FOR ALL N's:\n\t%s" % '\n\t'.join('(%s)\t%s' %(N, ''.join(result[N])) for N in range(minN,maxN)))
IyBodHRwOi8vc3RhY2tvdmVyZmxvdy5jb20vcS8zMzE4MTQzNC81MjkwOTA5CiMgQ2FwdHVyZSBldmVyeSBjaGFyYWN0ZXIgcmVwZWF0ZWQgTiB0aW1lcwppbXBvcnQgcmUKCnRleHQgPSAnYWJjZGJjZGNkZCcKbWluTiA9IDIKbWF4TiA9IDUKCiMgaW5pdAptYXhOICs9IDEgI21heE4gYXMgdXBwZXIgYm91bmQKcmVzdWx0ID0gW1snLScgZm9yIGkgaW4gcmFuZ2UobGVuKHRleHQpKV0gZm9yIGkgaW4gcmFuZ2UobWF4TildICNpbml0aWFsaXplIHJlc3VsdCBsaXN0CgoKZGVmIFJlcGVhdGVkQ2hhcnNQYXR0ZXJuKG1pbl9yZXBlYXQpOgoJIyBHZW5lcmF0ZSBwYXR0ZXJuCglwYXR0ID0gcicoXHcpKD86KD89KD86Lio/XDEpeycgKyBzdHIobWluX3JlcGVhdCkgKyByJ30pfCg/PScKCWZvciBpIGluIHJhbmdlKCAxLCBtaW5fcmVwZWF0KToKCQlwYXR0ICs9IHInKD86KD8hXDEpLikqKFwxKScKCXBhdHQgKz0gcicoPyEuKj9cMSkpKScKCXJldHVybiBwYXR0CgojIEV4YW1wbGUgZm9yIE4gPSBtaW5OIHRvIG1heE4gKDIgdG8gNSkKZm9yIE4gaW4gcmFuZ2UobWluTixtYXhOKToKCXBhdHRlcm4gPSBSZXBlYXRlZENoYXJzUGF0dGVybihOKQoJcmVnZXggPSByZS5jb21waWxlKHBhdHRlcm4pCgkKCXByaW50KCJQYXR0ZXJuOiByJyVzJ1xuTiA9ICVzXHRUZXh0OiAnJXMnIiAlKHBhdHRlcm4sIE4sIHRleHQpKQoJCgkjIExvb3AgbWF0Y2hlcwoJbSA9IDAKCWZvciBtYXRjaCBpbiByZWdleC5maW5kaXRlcih0ZXh0KToKCQltICs9IDEKCQkjIExvb3AgZ3JvdXBzCgkJZm9yIGMgaW4gcmFuZ2UoMSxyZWdleC5ncm91cHMgKyAxKToKCQkJY2hhciA9IG1hdGNoLmdyb3VwKGMpCgkJCWlmIGNoYXIgaXMgbm90IE5vbmU6CgkJCQlpbmRleCA9IG1hdGNoLnN0YXJ0KGMpCgkJCQlwcmludCggJ1x0TWF0Y2ggIyVzIGdyb3VwKCVzKVx0WyVzOiVzXTpcdCIlcyInICUobSwgYywgaW5kZXgsIG1hdGNoLmVuZChjKSwgY2hhcikpCgkJCQkjIGFkZCB0aGUgY2hhciB0byBhIGxpc3RbTl1baW5kZXhdIHRvIGtlZXAgb3JkZXJlZCBtYXRjaGVzCgkJCQlyZXN1bHRbTl1baW5kZXhdID0gY2hhcgoJaWYgbSA9PSAwOgoJCXByaW50KCJcdE5vIGNoYXJzIHJlcGVhdGVkICVzIHRpbWVzISIgJSBOKQoKIyBQcmludCBhbGwgY2hhcmFjdGVycyBtYXRjaGVkIGluIHRoZSBvcmlnaW5hbCBvcmRlcgpwcmludCgiUkVTVUxUIEZPUiBBTEwgTidzOlxuXHQlcyIgJSAnXG5cdCcuam9pbignKCVzKVx0JXMnICUoTiwgJycuam9pbihyZXN1bHRbTl0pKSBmb3IgTiBpbiByYW5nZShtaW5OLG1heE4pKSk=