from re import finditer
from collections import Counter
from sys import argv
#data = open(argv[1], 'rt', encoding='cp1251')
data = ['abcd, def -ref +\n', 'abcd+(ref)\n', 'abcd\n']
wordcount = Counter()
for line in data:
for word in finditer('[^-+().,:\s;><"\'?!]+', line):
w = word.group()
#print(w)
wordcount[w] += 1
for count, word in sorted(((c, w) for w, c in wordcount.items()), reverse=True):
print(word, count)
ZnJvbSByZSBpbXBvcnQgZmluZGl0ZXIKZnJvbSBjb2xsZWN0aW9ucyBpbXBvcnQgQ291bnRlcgpmcm9tIHN5cyBpbXBvcnQgYXJndgoKI2RhdGEgPSBvcGVuKGFyZ3ZbMV0sICdydCcsIGVuY29kaW5nPSdjcDEyNTEnKQpkYXRhID0gWydhYmNkLCBkZWYgLXJlZiArXG4nLCAnYWJjZCsocmVmKVxuJywgJ2FiY2RcbiddCgp3b3JkY291bnQgPSBDb3VudGVyKCkKZm9yIGxpbmUgaW4gZGF0YToKICAgIGZvciB3b3JkIGluIGZpbmRpdGVyKCdbXi0rKCkuLDpcczs+PCJcJz8hXSsnLCBsaW5lKToKICAgICAgICB3ID0gd29yZC5ncm91cCgpCiAgICAgICAgI3ByaW50KHcpCiAgICAgICAgd29yZGNvdW50W3ddICs9IDEKZm9yIGNvdW50LCB3b3JkIGluIHNvcnRlZCgoKGMsIHcpIGZvciB3LCBjIGluIHdvcmRjb3VudC5pdGVtcygpKSwgcmV2ZXJzZT1UcnVlKToKICAgIHByaW50KHdvcmQsIGNvdW50KQo=