import re
sentences = ['[\'Jan 31 19:28:14 nginx: 10.0.0.0 - - [31/Jan/2019:19:28:14 +0100] "POST /test/itf/ HTTP/x.x" 404 146 "-" "Mozilla/5.2 [en] (X11, U; OpenVAS-XX 9.2.7)"\']']
rx = re.compile(r'\b(\w{3})\s+(\d{1,2})\s+(\d{1,2}:\d{1,2}:\d{2})\s+(\w+)\W+(\d{1,3}(?:\.\d{1,3}){3})(?:\s+\S+){2}\s+\[([^][\s]+)\s+([+\d]+)]\s+"([A-Z]+)\s+(\S+)\s+(\S+)"\s+(\d+)\s+(\d+)\s+\S+\s+"([^"]*)"')
words=[]
for sent in sentences:
m = rx.search(sent)
if m:
words.append(list(m.groups()))
else:
pass #words.append(nltk.word_tokenize(sent) # uncomment in your code
print(words)
aW1wb3J0IHJlCgpzZW50ZW5jZXMgPSBbJ1tcJ0phbiAzMSAxOToyODoxNCBuZ2lueDogMTAuMC4wLjAgLSAtIFszMS9KYW4vMjAxOToxOToyODoxNCArMDEwMF0gIlBPU1QgL3Rlc3QvaXRmLyBIVFRQL3gueCIgNDA0IDE0NiAiLSIgIk1vemlsbGEvNS4yIFtlbl0gKFgxMSwgVTsgT3BlblZBUy1YWCA5LjIuNykiXCddJ10KCnJ4ID0gcmUuY29tcGlsZShyJ1xiKFx3ezN9KVxzKyhcZHsxLDJ9KVxzKyhcZHsxLDJ9OlxkezEsMn06XGR7Mn0pXHMrKFx3KylcVysoXGR7MSwzfSg/OlwuXGR7MSwzfSl7M30pKD86XHMrXFMrKXsyfVxzK1xbKFteXVtcc10rKVxzKyhbK1xkXSspXVxzKyIoW0EtWl0rKVxzKyhcUyspXHMrKFxTKykiXHMrKFxkKylccysoXGQrKVxzK1xTK1xzKyIoW14iXSopIicpCgp3b3Jkcz1bXQoKZm9yIHNlbnQgaW4gc2VudGVuY2VzOgoJbSA9IHJ4LnNlYXJjaChzZW50KQoJaWYgbToKCQl3b3Jkcy5hcHBlbmQobGlzdChtLmdyb3VwcygpKSkKCWVsc2U6CgkJcGFzcyAjd29yZHMuYXBwZW5kKG5sdGsud29yZF90b2tlbml6ZShzZW50KSAgIyB1bmNvbW1lbnQgaW4geW91ciBjb2RlCgpwcmludCh3b3Jkcyk=
[['Jan', '31', '19:28:14', 'nginx', '10.0.0.0', '31/Jan/2019:19:28:14', '+0100', 'POST', '/test/itf/', 'HTTP/x.x', '404', '146', 'Mozilla/5.2 [en] (X11, U; OpenVAS-XX 9.2.7)']]