fork download
  1. import re
  2.  
  3. text=['one two three basic', '1 2 3 basic', '1st 2nd 3rd basic',
  4. 'one two three very basic', '1 2 3 very basic', '1st 2nd 3rd very basic']
  5. num_list=['one','two','three']
  6. keywords = ['basic','main','foundations']
  7. dgt_part = r'\d+(?:st|[rn]d|th)?'
  8. num_wrd_part = '(?:{})'.format( '|'.join(num_list) )
  9. kwd_part = '|'.join(keywords)
  10. rx = re.compile(r'\b(?=({0}(?:\s+{0})*|{1}(?:\s+{1})*))\1(?!\s+(?:{2})\b)\s*'.format(dgt_part, num_wrd_part, kwd_part), re.I)
  11. print(rx.pattern)
  12. for element in text:
  13. print( rx.sub('', element) )
  14.  
Success #stdin #stdout 0.02s 9652KB
stdin
Standard input is empty
stdout
\b(?=(\d+(?:st|[rn]d|th)?(?:\s+\d+(?:st|[rn]d|th)?)*|(?:one|two|three)(?:\s+(?:one|two|three))*))\1(?!\s+(?:basic|main|foundations)\b)\s*
one two three basic
1 2 3 basic
1st 2nd 3rd basic
very basic
very basic
very basic