import re p = re.compile(ur'(\w)\1\1\1+', re.MULTILINE) test_str = u'aaaaaadgfdhgfbbbbgggee)ehje ekekkkkkeefh ehfeff88888éh\"hr ekkKKKKK\njhxjhjhhhhh\nthe the\n\"eeeeeeeeaaaabcaaaaadexxxdddzdzzzz\"\n\"eeeeeeeeaaaabcaaaaadexxxdddzdzzzz\"\n\"eeeeeeeeaaaabcaaaaadexxxdddzdzzzz\"\n\"aaaeeeeeeeeaaaadzxzzzzffffrrrr\"\n\"aaa\"' print ([x.group(0) for x in p.finditer(test_str)])
Standard input is empty
[u'aaaaaa', u'bbbb', u'kkkkk', u'88888', u'KKKKK', u'hhhhh', u'eeeeeeee', u'aaaa', u'aaaaa', u'zzzz', u'eeeeeeee', u'aaaa', u'aaaaa', u'zzzz', u'eeeeeeee', u'aaaa', u'aaaaa', u'zzzz', u'eeeeeeee', u'aaaa', u'zzzz', u'ffff', u'rrrr']