from collections import Counter
import chardet
def check_encoding( fname) :
rawdata = open ( fname, "rb" ) .read ( )
result = chardet.detect ( rawdata)
return result[ 'encoding' ]
def top_10( f) :
array = [ line.strip ( ) .split ( ) for line in f]
data = [ ]
m = ( )
for i in range ( len ( array ) ) :
for j in range ( len ( array [ i] ) ) :
if len ( array [ i] [ j] ) > 6 :
data.append ( array [ i] [ j] )
p = Counter( data)
m = ( p.most_common ( 10 ) )
return m
news = [ "newsafr.txt" , "newscy.txt" , "newsfr.txt" , "newsit.txt" ]
ecod = [ ]
for enc in news:
ecod.append ( check_encoding( enc) )
news_dict = { "newsafr.txt" :"utf-8" ,
"newscy.txt" :"ascii" ,
"newsfr.txt" :"ISO-8859-5" ,
"newsit.txt" :"windows-1251"
}
with open ( "newsafr.txt" , "r" , encoding= "utf-8" ) as f:
print ( *( top_10( f) ) )
with open ( "newscy.txt" , "r" , encoding= "ascii" ) as f:
print ( *( top_10( f) ) )
with open ( "newsfr.txt" , "r" , encoding= "ISO-8859-5" ) as f:
print ( *( top_10( f) ) )
with open ( "newsit.txt" , "r" , encoding= "windows-1251" ) as f:
print ( *( top_10( f) ) )
ZnJvbSBjb2xsZWN0aW9ucyBpbXBvcnQgQ291bnRlcgppbXBvcnQgY2hhcmRldAoKCmRlZiBjaGVja19lbmNvZGluZyhmbmFtZSk6CiAgICByYXdkYXRhID0gb3BlbihmbmFtZSwgInJiIikucmVhZCgpCiAgICByZXN1bHQgPSBjaGFyZGV0LmRldGVjdChyYXdkYXRhKQogICAgcmV0dXJuIHJlc3VsdFsnZW5jb2RpbmcnXQoKZGVmIHRvcF8xMChmKToKICAgIGFycmF5ID0gW2xpbmUuc3RyaXAoKS5zcGxpdCgpIGZvciBsaW5lIGluIGZdCiAgICBkYXRhID0gW10KICAgIG0gPSAoKQogICAgZm9yIGkgaW4gcmFuZ2UobGVuKGFycmF5KSk6CiAgICAgICAgZm9yIGogaW4gcmFuZ2UobGVuKGFycmF5W2ldKSk6CiAgICAgICAgICAgIGlmIGxlbihhcnJheVtpXVtqXSkgPiA2OgogICAgICAgICAgICAgICAgZGF0YS5hcHBlbmQoYXJyYXlbaV1bal0pCiAgICBwID0gQ291bnRlcihkYXRhKQogICAgbSA9IChwLm1vc3RfY29tbW9uKDEwKSkKICAgIHJldHVybiBtCgoKbmV3cyA9IFsibmV3c2Fmci50eHQiLCAibmV3c2N5LnR4dCIsICJuZXdzZnIudHh0IiwgIm5ld3NpdC50eHQiXQplY29kID0gW10KZm9yIGVuYyBpbiBuZXdzOgogICAgZWNvZC5hcHBlbmQoY2hlY2tfZW5jb2RpbmcoZW5jKSkKbmV3c19kaWN0ID0geyJuZXdzYWZyLnR4dCI6InV0Zi04IiwKICAgICAgICAgICAgICJuZXdzY3kudHh0IjoiYXNjaWkiLAogICAgICAgICAgICAgIm5ld3Nmci50eHQiOiJJU08tODg1OS01IiwKICAgICAgICAgICAgICJuZXdzaXQudHh0Ijoid2luZG93cy0xMjUxIgogICAgICAgICAgICAgfQoKCgp3aXRoIG9wZW4oIm5ld3NhZnIudHh0IiwgInIiLCBlbmNvZGluZz0idXRmLTgiKSBhcyBmOgogICAgICAgIHByaW50KCoodG9wXzEwKGYpKSkKd2l0aCBvcGVuKCJuZXdzY3kudHh0IiwgInIiLCBlbmNvZGluZz0iYXNjaWkiKSBhcyBmOgogICAgICAgIHByaW50KCoodG9wXzEwKGYpKSkKd2l0aCBvcGVuKCJuZXdzZnIudHh0IiwgInIiLCBlbmNvZGluZz0iSVNPLTg4NTktNSIpIGFzIGY6CiAgICBwcmludCgqKHRvcF8xMChmKSkpCndpdGggb3BlbigibmV3c2l0LnR4dCIsICJyIiwgZW5jb2Rpbmc9IndpbmRvd3MtMTI1MSIpIGFzIGY6CiAgICBwcmludCgqKHRvcF8xMChmKSkpCgoKCg==