fork download
  1. import re
  2.  
  3. test1 = "Francisco da Sousa Rodrigues" #special split
  4. test2 = "Emiliano Rodrigo Carrasco" #normal split
  5. test3 = "Alberto de Francia" #special split
  6. test4 = "Bruno Rezende" #normal split
  7.  
  8. PATTERN = re.compile(r'(?:(?:da|de|do|dos|das)\s+)?\S+')
  9.  
  10. print re.findall(PATTERN, test1)
  11.  
  12. print re.findall(PATTERN, test2)
  13.  
  14. print re.findall(PATTERN, test3)
  15.  
  16. print re.findall(PATTERN, test4)
  17.  
  18.  
Success #stdin #stdout 0.02s 6832KB
stdin
Standard input is empty
stdout
['Francisco', 'da Sousa', 'Rodrigues']
['Emiliano', 'Rodrigo', 'Carrasco']
['Alberto', 'de Francia']
['Bruno', 'Rezende']