fork download
  1. import re
  2.  
  3. pdfContent = "\n\nBlah blah.\n\nDate: 2022-01-31\n\nOptional line here which sometimes does not show\n\nAmount: 123.45\n\n2: Blah blah.\n"
  4.  
  5. RE = re.compile(
  6. r"Date:\s+(\S+)(?:.*?"
  7. r"(Optional line here which sometimes does not show))?.*?"
  8. r"Amount:\s+(?P<amount>\S+)",
  9. re.DOTALL)
  10.  
  11. matches = RE.search(pdfContent)
  12. date = matches.group(1)
  13. optional = matches.group(2)
  14. amount = matches.group("amount")
  15.  
  16. print(f"date = {date}")
  17. print(f"optional = {optional}")
  18. print(f"amount = {amount}")
Success #stdin #stdout 0.03s 9440KB
stdin
Standard input is empty
stdout
date     = 2022-01-31
optional = Optional line here which sometimes does not show
amount   = 123.45