import re regex = r"^(\w{3}\s+\d+\s\d+:\d+:\d+)\s?:\s?(\w+)\s+:.*(?:\r?\n(?!.*COMMAND).*)*\r?\n.*?COMMAND=(.*(?:\r?\n(?!\w{3}\s+\d+\s\d+).*)*)" test_str = ("Aug 7 14:14:43 : user1 : TTY=pts/53 ; PWD=/path2 ;\n" " USER=root ; COMMAND=/path/to/cmd1\n" "Aug 7 14:14:49 : user2 : TTY=pts/53 ; PWD=/usr/home ;\n" " USER=root ; COMMAND=./myscript.sh -m name -o SCHEDULER\n" "Aug 7 14:15:14 : user3 : TTY=pts/34 ;\n" " PWD=/path ; USER=root ;\n" " COMMAND=/usr/bin/egrep ^[a-z]*\n" " /filename/toto1234\n" "Aug 7 14:15:37 : user4 : TTY=unknown ; PWD=/opt/nagios ; USER=root ;\n" " COMMAND=/path/to/less\n" " /var/opt/otherfile\n" "Aug 7 14:16:04 : user4 : TTY=pts/34 ;\n" " PWD=/usr/local/bin/script ; USER=root ;\n" " COMMAND=/usr/bin/egrep ^[a-z]*\n" " /user/local/sbin/tata") print(re.findall(regex, test_str, re.MULTILINE))
Standard input is empty
[('Aug 7 14:14:43', 'user1', '/path/to/cmd1'), ('Aug 7 14:14:49', 'user2', './myscript.sh -m name -o SCHEDULER'), ('Aug 7 14:15:14', 'user3', '/usr/bin/egrep ^[a-z]*\n /filename/toto1234'), ('Aug 7 14:15:37', 'user4', '/path/to/less\n /var/opt/otherfile'), ('Aug 7 14:16:04', 'user4', '/usr/bin/egrep ^[a-z]*\n /user/local/sbin/tata')]