# encoding: utf-8
Encoding.default_external = Encoding::UTF_8
file = File.read("x.html").split(/\n/)
c = Hash.new 0
file.each { |line|
  m = line.scan /\p{Cyrillic}+/i
  m.each { |x| c[x] += 1 }
}

xwords = ["Каришка", "Липцис", "Андрей", "Воронин", "Пересланные", "сообщения", "что", "меня", "мне", "как"]

c.reject {|k, v| xwords.include?(v) || k.length < 3 || v > 30000}.sort_by {|_key, value| value}.last(500).each { |k, v|
  p "#{k} => #{v}"
}