fork(4) download
  1. # http://stackoverflow.com/q/33181434/5290909
  2. # Capture every character repeated N times
  3. import re
  4.  
  5. text = 'abcdbcdcdd'
  6. minN = 2
  7. maxN = 5
  8.  
  9. # init
  10. maxN += 1 #maxN as upper bound
  11. result = [['-' for i in range(len(text))] for i in range(maxN)] #initialize result list
  12.  
  13.  
  14. def RepeatedCharsPattern(min_repeat):
  15. # Generate pattern
  16. patt = r'(\w)(?:(?=(?:.*?\1){' + str(min_repeat) + r'})|(?='
  17. for i in range( 1, min_repeat):
  18. patt += r'(?:(?!\1).)*(\1)'
  19. patt += r'(?!.*?\1)))'
  20. return patt
  21.  
  22. # Example for N = minN to maxN (2 to 5)
  23. for N in range(minN,maxN):
  24. pattern = RepeatedCharsPattern(N)
  25. regex = re.compile(pattern)
  26.  
  27. print("Pattern: r'%s'\nN = %s\tText: '%s'" %(pattern, N, text))
  28.  
  29. # Loop matches
  30. m = 0
  31. for match in regex.finditer(text):
  32. m += 1
  33. # Loop groups
  34. for c in range(1,regex.groups + 1):
  35. char = match.group(c)
  36. if char is not None:
  37. index = match.start(c)
  38. print( '\tMatch #%s group(%s)\t[%s:%s]:\t"%s"' %(m, c, index, match.end(c), char))
  39. # add the char to a list[N][index] to keep ordered matches
  40. result[N][index] = char
  41. if m == 0:
  42. print("\tNo chars repeated %s times!" % N)
  43.  
  44. # Print all characters matched in the original order
  45. print("RESULT FOR ALL N's:\n\t%s" % '\n\t'.join('(%s)\t%s' %(N, ''.join(result[N])) for N in range(minN,maxN)))
Success #stdin #stdout 0.03s 9984KB
stdin
Standard input is empty
stdout
Pattern: r'(\w)(?:(?=(?:.*?\1){2})|(?=(?:(?!\1).)*(\1)(?!.*?\1)))'
N = 2	Text: 'abcdbcdcdd'
	Match #1 group(1)	[1:2]:	"b"
	Match #1 group(2)	[4:5]:	"b"
	Match #2 group(1)	[2:3]:	"c"
	Match #3 group(1)	[3:4]:	"d"
	Match #4 group(1)	[5:6]:	"c"
	Match #4 group(2)	[7:8]:	"c"
	Match #5 group(1)	[6:7]:	"d"
	Match #6 group(1)	[8:9]:	"d"
	Match #6 group(2)	[9:10]:	"d"
Pattern: r'(\w)(?:(?=(?:.*?\1){3})|(?=(?:(?!\1).)*(\1)(?:(?!\1).)*(\1)(?!.*?\1)))'
N = 3	Text: 'abcdbcdcdd'
	Match #1 group(1)	[2:3]:	"c"
	Match #1 group(2)	[5:6]:	"c"
	Match #1 group(3)	[7:8]:	"c"
	Match #2 group(1)	[3:4]:	"d"
	Match #3 group(1)	[6:7]:	"d"
	Match #3 group(2)	[8:9]:	"d"
	Match #3 group(3)	[9:10]:	"d"
Pattern: r'(\w)(?:(?=(?:.*?\1){4})|(?=(?:(?!\1).)*(\1)(?:(?!\1).)*(\1)(?:(?!\1).)*(\1)(?!.*?\1)))'
N = 4	Text: 'abcdbcdcdd'
	Match #1 group(1)	[3:4]:	"d"
	Match #1 group(2)	[6:7]:	"d"
	Match #1 group(3)	[8:9]:	"d"
	Match #1 group(4)	[9:10]:	"d"
Pattern: r'(\w)(?:(?=(?:.*?\1){5})|(?=(?:(?!\1).)*(\1)(?:(?!\1).)*(\1)(?:(?!\1).)*(\1)(?:(?!\1).)*(\1)(?!.*?\1)))'
N = 5	Text: 'abcdbcdcdd'
	No chars repeated 5 times!
RESULT FOR ALL N's:
	(2)	-bcdbcdcdd
	(3)	--cd-cdcdd
	(4)	---d--d-dd
	(5)	----------