import re string = """ msg=audit(1234902.147:88): pid=254 uid=1000 auid=1000 ses=3 subj=random_ex:random_ex:random_ex:d3-d3:w0.c12 30 msg='op=PAM:accounting grantors=pam_unix,pam_localuser acct="lemoney" exe="/usr/bin/grep" hostname=? a ddr=? terminal=/dev/pts/0 res=success' msg=audit(432787023.324:77): pid=1254 uid=1000 auid=1000 ses=3 subj=random_ex:random_ex:random_ex:d3-d3:w0.c12 30 msg='op=PAM:accounting grantors=pam_unix,pam_localuser acct="lemoney" exe="/usr/bin/tail" hostname=? a ddr=? terminal=/dev/pts/0 res=success' """ # lines regex entries = re.compile(r'^msg=.+', re.MULTILINE) # outer regex rx = re.compile(""" ((\w+)='([^']+)') # longer group | # or (\w+=\S+) # single items """, re.VERBOSE) # inner regex ry = re.compile("(\w+)=(\S+)") for entry in entries.finditer(string): result = dict() for match in rx.finditer(entry.group(0)): try: key, value = match.group(4).split('=') result[key] = value except: #key = match.group(2) inner = dict() for m in ry.finditer(match.group(3)): inner[m.group(1)] = m.group(2) result["mess"] = inner print(result)
Standard input is empty
{'auid': '1000', 'ses': '3', 'msg': 'audit(1234902.147:88):', 'pid': '254', 'mess': {'exe': '"/usr/bin/grep"', 'terminal': '/dev/pts/0', 'ddr': '?', 'res': 'success', 'acct': '"lemoney"', 'hostname': '?', 'grantors': 'pam_unix,pam_localuser', 'op': 'PAM:accounting'}, 'uid': '1000', 'subj': 'random_ex:random_ex:random_ex:d3-d3:w0.c12'} {'auid': '1000', 'ses': '3', 'msg': 'audit(432787023.324:77):', 'pid': '1254', 'mess': {'exe': '"/usr/bin/tail"', 'terminal': '/dev/pts/0', 'ddr': '?', 'res': 'success', 'acct': '"lemoney"', 'hostname': '?', 'grantors': 'pam_unix,pam_localuser', 'op': 'PAM:accounting'}, 'uid': '1000', 'subj': 'random_ex:random_ex:random_ex:d3-d3:w0.c12'}