import re
test1 = "Francisco da Sousa Rodrigues" #special split
test2 = "Emiliano Rodrigo Carrasco" #normal split
test3 = "Alberto de Francia" #special split
test4 = "Bruno Rezende" #normal split
PATTERN = re.compile(r'(?:(?:da|de|do|dos|das)\s+)?\S+')
print re.findall(PATTERN, test1)
print re.findall(PATTERN, test2)
print re.findall(PATTERN, test3)
print re.findall(PATTERN, test4)
aW1wb3J0IHJlCgp0ZXN0MSA9ICJGcmFuY2lzY28gZGEgU291c2EgUm9kcmlndWVzIiAjc3BlY2lhbCBzcGxpdAp0ZXN0MiA9ICJFbWlsaWFubyBSb2RyaWdvIENhcnJhc2NvIiAjbm9ybWFsIHNwbGl0CnRlc3QzID0gIkFsYmVydG8gZGUgRnJhbmNpYSIgI3NwZWNpYWwgc3BsaXQKdGVzdDQgPSAiQnJ1bm8gUmV6ZW5kZSIgI25vcm1hbCBzcGxpdAoKUEFUVEVSTiA9IHJlLmNvbXBpbGUocicoPzooPzpkYXxkZXxkb3xkb3N8ZGFzKVxzKyk/XFMrJykKCnByaW50IHJlLmZpbmRhbGwoUEFUVEVSTiwgdGVzdDEpCgpwcmludCByZS5maW5kYWxsKFBBVFRFUk4sIHRlc3QyKQoKcHJpbnQgcmUuZmluZGFsbChQQVRURVJOLCB0ZXN0MykKCnByaW50IHJlLmZpbmRhbGwoUEFUVEVSTiwgdGVzdDQpCgo=