fork download
  1. import re
  2. all_domain=['com edu','.com edu','inc.', '.com', 'inc', 'com', '.edu', 'edu']
  3. all_domain.sort(key=len, reverse=True)
  4. domain_alternators = '|'.join(map(re.escape,all_domain)) # <-- HERE
  5.  
  6. print(domain_alternators)
  7. regex = re.compile(r'(?<!\w)({}|[a-z-A-Z]+)(?!\w)'.format(domain_alternators)) # <-- HERE
  8. print(regex)
  9. #re.compile('\\b(.com edu|com edu|inc.|.com|.edu|inc|com|edu|[a-z-A-Z]+)\\b')
  10.  
  11. name= 'BASIC SCHOOL DISTRICT .COM'
  12. result=regex.findall(name.lower())
  13. print(result)
Success #stdin #stdout 0.02s 9576KB
stdin
Standard input is empty
stdout
\.com\ edu|com\ edu|inc\.|\.com|\.edu|inc|com|edu
re.compile('(?<!\\w)(\\.com\\ edu|com\\ edu|inc\\.|\\.com|\\.edu|inc|com|edu|[a-z-A-Z]+)(?!\\w)')
['basic', 'school', 'district', '.com']