import re regex = r"\b([A-Z]+(?:[^\S\r\n]+[A-Z]+)*):[^\S\r\n]+([A-Z0-9].*?(?= [A-Z]|$))" test_str = "COMPANY NAME: Ruff name of company TYPE OF EVENT: Party NOTIFIED DATE: 1/27/20 COMPANY NAME: Company2/CPT TYPE OF EVENT: Fire NOTIFIED DATE: 1/31/20" print(re.findall(regex, test_str))
Standard input is empty
[('COMPANY NAME', 'Ruff name of company'), ('TYPE OF EVENT', 'Party'), ('NOTIFIED DATE', '1/27/20 '), ('COMPANY NAME', 'Company2/CPT'), ('TYPE OF EVENT', 'Fire'), ('NOTIFIED DATE', '1/31/20')]