import re
str = 'Lorem IPSUM is simply DUMMY text of the printing and typesetting INDUSTRY'
re1 = r'\b([A-Z]{4,})\b'
re2 = r'(?:\s*\w+\b){,4}'
arr = re.split(re1, str)
result = []
for i in range(len(arr)):
if i % 2:
result.append( (re.search(re2, arr[i-1]).group(), arr[i], re.search(re2, arr[i+1]).group()) )
print result
aW1wb3J0IHJlCgpzdHIgPSAnTG9yZW0gSVBTVU0gaXMgc2ltcGx5IERVTU1ZIHRleHQgb2YgdGhlIHByaW50aW5nIGFuZCB0eXBlc2V0dGluZyBJTkRVU1RSWScKCnJlMSA9IHInXGIoW0EtWl17NCx9KVxiJwpyZTIgPSByJyg/OlxzKlx3K1xiKXssNH0nCgphcnIgPSByZS5zcGxpdChyZTEsIHN0cikKCnJlc3VsdCA9IFtdCgpmb3IgaSBpbiByYW5nZShsZW4oYXJyKSk6CglpZiBpICUgMjoKCQlyZXN1bHQuYXBwZW5kKCAocmUuc2VhcmNoKHJlMiwgYXJyW2ktMV0pLmdyb3VwKCksIGFycltpXSwgcmUuc2VhcmNoKHJlMiwgYXJyW2krMV0pLmdyb3VwKCkpICkKCgpwcmludCByZXN1bHQ=
[('Lorem', 'IPSUM', ' is simply'), (' is simply', 'DUMMY', ' text of the printing'), (' text of the printing', 'INDUSTRY', '')]