fork download
  1. import re
  2.  
  3.  
  4. day = r'(?:((?:0?[1-9]|[12]\d|3[01])(?:\s*(?:st|[rn]d|th))?)\s*)?'
  5. month = r'(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)'
  6. year = r'(\d{2}(?:\d{2})?)'
  7. rx_valid = re.compile( fr'\b{day}{month}\s*{year}\s*[-—–]\s*{day}{month}\s*{year}(?!\d)', re.IGNORECASE )
  8. rx_ordinal = re.compile( r'\s*\d+\s*(?:st|[rn]d|th)', re.IGNORECASE )
  9.  
  10. lst = [
  11. 'July 2014 - 28th August 2014',
  12. 'Jan 2012 - 3rd sep 2014',
  13. 'Jan 2008 - May 2012',
  14. 'Jan 2008 and May 2012'
  15. ]
  16. for i in lst:
  17. word = rx_valid.finditer(i)
  18. for match in word:
  19. print(rx_ordinal.sub("", match.group()))
Success #stdin #stdout 0.03s 9772KB
stdin
Standard input is empty
stdout
July 2014 - August 2014
Jan 2012 - sep 2014
Jan 2008 - May 2012