fork download
  1. import re
  2. text = "Transport Department Government of NCT of Delhi\nLicence to Drive Vehicles Throughout India\n\nLicence No. : DL-0820100052000 (P) R\nN : PARMINDER PAL SINGH GILL\n\n: SHRI DARSHAN SINGH GILL\n\nDOB: 10/05/1966 BG: U\nAddress :\n\n104 SHARDA APPTT WEST ENCLAVE\nPITAMPURA DELHI 110034\n\n\n\nAuth to Drive Date of Issue\nM.CYL. 24/02/2010\nLMV-NT 24/02/2010\n\n(Holder's Sig natu re)\n\nIssue Date : 20/05/2016\nValidity(NT) : 19/05/2021 : c\nValidity(T) : NA Issuing Authority\nInvCarrNo : NA NWZ-I, WAZIRPUR"
  3. search_phrases = ['Issue Date', 'Licence No.', 'N', 'Validity(NT)']
  4. reg = r"\b({})\s*:\W*(.+)".format( "|".join(sorted(map(re.escape, search_phrases), key=len, reverse=True)) )
  5. print(reg)
  6. print(re.findall(reg, text, re.IGNORECASE))
Success #stdin #stdout 0.02s 9436KB
stdin
Standard input is empty
stdout
\b(Validity\(NT\)|Licence\ No\.|Issue\ Date|N)\s*:\W*(.+)
[('Licence No.', 'DL-0820100052000 (P) R'), ('N', 'PARMINDER PAL SINGH GILL'), ('Issue Date', '20/05/2016'), ('Validity(NT)', '19/05/2021 : c')]