fork(1) download
  1. import re
  2.  
  3. text=['one two three basic', '1 2 3 basic', '1st 2nd 3rd basic',
  4. 'one two three very basic', '1 2 3 very basic', '1st 2nd 3rd very basic']
  5. num_list=['one','two','three']
  6. keywords = ['basic','main','foundations']
  7. dgt_part = r'\d+(?:st|[rn]d|th)?'
  8. num_wrd_part = '(?:{})'.format( '|'.join(num_list) )
  9.  
  10. rx = re.compile(r'\b((?:{0}(?:\s+{0})*|{1}(?:\s+{1})*)\s+(?:{2})\b)|\b(?:{0}(?:\s+{0})*|{1}(?:\s+{1})*)'.format(dgt_part, num_wrd_part, '|'.join(keywords)), re.I)
  11. print(rx.pattern)
  12. for element in text:
  13. print( rx.sub(lambda x: x.group(1) or '', element).strip() )
  14.  
Success #stdin #stdout 0.02s 9532KB
stdin
Standard input is empty
stdout
\b((?:\d+(?:st|[rn]d|th)?(?:\s+\d+(?:st|[rn]d|th)?)*|(?:one|two|three)(?:\s+(?:one|two|three))*)\s+(?:basic|main|foundations)\b)|\b(?:\d+(?:st|[rn]d|th)?(?:\s+\d+(?:st|[rn]d|th)?)*|(?:one|two|three)(?:\s+(?:one|two|three))*)
one two three basic
1 2 3 basic
1st 2nd 3rd basic
very basic
very basic
very basic