fork download
  1. import re
  2.  
  3. str = 'Lorem IPSUM is simply DUMMY text of the printing and typesetting INDUSTRY'
  4.  
  5. re1 = r'\b([A-Z]{4,})\b'
  6. re2 = r'(?:\s*\w+\b){,4}'
  7.  
  8. arr = re.split(re1, str)
  9.  
  10. result = []
  11.  
  12. for i in range(len(arr)):
  13. if i % 2:
  14. result.append( (re.search(re2, arr[i-1]).group(), arr[i], re.search(re2, arr[i+1]).group()) )
  15.  
  16.  
  17. print result
Success #stdin #stdout 0.02s 6848KB
stdin
Standard input is empty
stdout
[('Lorem', 'IPSUM', ' is simply'), (' is simply', 'DUMMY', ' text of the printing'), (' text of the printing', 'INDUSTRY', '')]