#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
from functions import *
from bs4 import BeautifulSoup
import http.cookiejar as cookielib
import codecs
import os
import requests
def filterProblems( problems1, problems2) :
for key in problems2:
if key in problems1:
del problems1[ key]
return problems1
class Extractor( object ) :
def __init__ ( self ) :
self .s = requests.session ( )
self .lang = ""
self .folders = [ ]
self .header = [ ]
self .tokenFields = "41dcc576d590c4d85784392529146d228d160ebf%3A"
def extractProblems( self , soup, problems) :
itens = soup.find_all ( 'table' ) [ 0 ] .find_all ( 'tbody' ) [ 0 ] .find_all ( 'tr' )
for tr in itens:
#print(tr)
td = tr.find_all ( 'td' )
if len ( td) != 8 :
return
else :
problems[ td[ 0 ] .find ( 'a' ) .text .strip ( ) ] = {
"id_code" : td[ 2 ] .find ( 'a' ) .text .strip ( ) ,
"name" : td[ 3 ] .find ( 'a' ) .text .strip ( ) ,
"language" : td[ 4 ] .text .strip ( )
}
def folderStructure( self ) :
if not os .path .exists ( './URI-' +self .lang .upper ( ) +'/' ) :
os .makedirs ( './URI-' +self .lang .upper ( ) +'/' )
for name in self .folders :
if not os .path .exists ( './URI-' +self .lang .upper ( ) +'/' +name) :
os .makedirs ( './URI-' +self .lang .upper ( ) +'/' +name)
def login( self ) :
username = input ( '\n \n Login e-mail: ' )
password = input ( 'Password: ' )
self .tokenFields = self .getPage ( "https://w...content-available-to-author-only...m.br/judge/pt/login" ) .find ( "input" , { "name" : "_Token[fields]" } ) .attrs [ 'value' ]
payload_login = {
'_method' : 'POST' ,
'_csrfToken' : self .s .cookies [ 'csrfToken' ] ,
'email' : username,
'password' : password,
'remember_me' : '0' ,
'_Token[fields]' : self .tokenFields ,
'_Token[unlocked]' : ''
}
r = self .s .post ( "https://w...content-available-to-author-only...m.br/judge/pt/login" , data= payload_login)
if len ( r.history ) == 0 :
print ( "Invalid login. Try Again" )
return False
self .lang = r.url .split ( '/' ) [ -1 ]
self .email = username
self .author = self .getPage ( 'https://w...content-available-to-author-only...m.br/judge/' +self .lang +'/account' ) .find ( "input" , { "name" : "username" } ) .attrs [ 'value' ]
if self .lang == 'pt' :
self .folders = [ "INICIANTE" , "AD-HOC" , "STRINGS" , "ESTRUTURAS E BIBLIOTECAS" , "MATEMÁTICA" , "PARADIGMAS" , "GRAFOS" , "GEOMETRIA COMPUTACIONAL" , "SQL" ]
self .header = [ " Autor: " +self .author + "<" +self .email +">" , " Nome: " , " Nível: " , " Categoria: " ]
return [ "Login inválido" , "\n Logado com sucesso\n " , "Estrutura de pastas criada" , "Criando lista de download" , "Lista de download criada: " , " problemas" , "\n Baixando códigos" , "\n Códigos baixados com sucesso" , "Nome completo: " , "Email de contato: " ]
elif self .lang == 'en' :
self .folders = [ "BEGINNER" , "AD-HOC" , "STRINGS" , "DATA STRUCTURES AND LIBRARIES" , "MATHEMATICS" , "PARADIGMS" , "GRAPH" , "COMPUTATIONAL GEOMETRY" , "SQL" ]
self .header = [ " Author: " +self .author + "<" +self .email +">" , " Name: " , " Level: " , " Category: " ]
return [ "Invalid login" , "\n Successfully logged in\n " , "Folder structure created" , "Creating download list" , "Download list created: " , " problems" , "\n Downloading Codes" , "\n Codes successfully downloaded" , "Full name: " , "Email contact: " ]
elif self .lang == 'es' :
self .folders = [ "INICIANTE" , "AD-HOC" , "STRINGS" , "ESTRUTURAS E BIBLIOTECAS" , "MATEMÁTICA" , "PARADIGMAS" , "GRAFOS" , "GEOMETRIA COMPUTACIONAL" , "SQL" ]
self .header = [ " Autor: " +self .author + "<" +self .email +">" , " Nome: " , " Nível: " , " Categoria: " ]
return [ "Login inválido" , "\n Logado com sucesso\n " , "Estrutura de pastas criada" , "Criando lista de download" , "Lista de download criada: " , " problemas" , "\n Baixando códigos" , "\n Códigos baixados com sucesso" , "Nome completo: " , "Email de contato: " ]
def getPage( self , url) :
return BeautifulSoup( self .s .get ( url) .content , 'html.parser' )
def getUriProblems( self ) :
qt = int ( self .getPage ( 'https://w...content-available-to-author-only...m.br/judge/' +self .lang +'/runs?answer_id=1' ) .find ( "div" , { "id" : "table-info" } ) .text .split ( " " ) [ 6 ] )
#print(qt)
problems = { }
for i in range ( qt, 0 , -1 ) :
self .extractProblems ( self .getPage ( 'https://w...content-available-to-author-only...m.br/judge/' +self .lang +'/runs?answer_id=1&page=' +str ( i) ) , problems)
#print(len(problems))
return problems
def getLocalProblems( self ) :
problems = [ ]
for path in os .listdir ( './URI-' +self .lang .upper ( ) +'/' ) :
for arq in os .listdir ( './URI-' +self .lang .upper ( ) +'/' +path+'/' ) :
problems.append ( arq.rsplit ( '.' , 1 ) [ 0 ] .split ( '-' ) [ 0 ] .strip ( ) )
#print(problems)
return problems
def infoProblem( self , id ) :
menu = self .getPage ( 'https://w...content-available-to-author-only...m.br/judge/' +self .lang +'/problems/view/' +id ) .find ( "div" , { "id" : "problem-menu" } )
#print("KKKK----", menu)
return {
"level" : "1" ,
"category" : "Beginner"
}
def getCode( self , id ) :
print ( ( self .getPage ( 'https://w...content-available-to-author-only...m.br/judge/' +self .lang +'/runs/code/' +id ) .find ( "pre" , { "id" : "code" } ) ) )
def cleanName( self , name) :
return name.replace ( "\\ " , "" ) .replace ( "/" , "" ) .replace ( ":" , "" ) .replace ( "?" , "" ) .replace ( "\" " , "" ) .replace ( "<" , "" ) .replace ( ">" , "" ) .replace ( "|" , "" )
def downloadCodes( self , problems) :
for id in problems:
if self .lang == "pt" :
print ( "\t Problema " +id +" baixado" )
else :
print ( "\t Problem " +id +" downloaded" )
info = self .infoProblem ( id )
code = self .getCode ( problems[ id ] [ "id_code" ] )
extension = ""
comment = "//"
if problems[ id ] [ "language" ] == "C++" :
extension = ".cpp"
elif problems[ id ] [ "language" ] == "C" :
extension = ".c"
elif problems[ id ] [ "language" ] == "Java" :
extension = ".java"
else :
extension = ".py"
comment = "#"
arq = codecs .open ( "./URI-" +self .lang .upper ( ) +"/" +info[ "category" ] +"/" +id +" - " +self .cleanName ( problems[ id ] [ "name" ] ) +extension, "w" , "utf-8" )
arq.write ( comment+self .header [ 0 ] +"\n " )
arq.write ( comment+self .header [ 1 ] +problems[ id ] [ "name" ] +"\n " )
arq.write ( comment+self .header [ 2 ] +info[ "level" ] +"\n " )
arq.write ( comment+self .header [ 3 ] +info[ "category" ] +"\n " )
arq.write ( comment+" URL: https://w...content-available-to-author-only...m.br/judge/" +self .lang +"/problems/view/" +id +"\n \n " )
arq.write ( code )
arq.write ( "\n " )
arq.close ( )
extractor = Extractor( )
message = extractor.login ( )
while message == False :
message = extractor.login ( )
print ( message[ 1 ] )
extractor.folderStructure ( )
print ( message[ 2 ] )
print ( message[ 3 ] )
download_list = filterProblems( extractor.getUriProblems ( ) , extractor.getLocalProblems ( ) )
print ( message[ 4 ] +str ( len ( download_list) ) +message[ 5 ] )
print ( message[ 6 ] )
extractor.downloadCodes ( download_list)
print ( message[ 7 ] )
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
from functions import *
from bs4 import BeautifulSoup
import http.cookiejar as cookielib
import codecs
import os
import requests

def filterProblems(problems1, problems2):
    for key in problems2:
        if key in problems1:
            del problems1[key]
    return problems1

class Extractor(object):
    def __init__(self):
        self.s = requests.session()
        self.lang = ""
        self.folders = []
        self.header = []
        self.tokenFields = "41dcc576d590c4d85784392529146d228d160ebf%3A"
            
    def extractProblems(self, soup, problems):
        itens = soup.find_all('table')[0].find_all('tbody')[0].find_all('tr')
        for tr in itens:
            #print(tr)
            td = tr.find_all('td')
            if len(td) != 8:
                return 
            else:                
                problems[td[0].find('a').text.strip()] = {
                    "id_code": td[2].find('a').text.strip(),
                    "name": td[3].find('a').text.strip(),
                    "language": td[4].text.strip()
                }

    def folderStructure(self):
        if not os.path.exists('./URI-'+self.lang.upper()+'/'):
            os.makedirs('./URI-'+self.lang.upper()+'/')
        for name in self.folders:
            if not os.path.exists('./URI-'+self.lang.upper()+'/'+name):
                os.makedirs('./URI-'+self.lang.upper()+'/'+name)

    def login(self):
        username = input('\n\nLogin e-mail: ')
        password = input('Password: ')
        self.tokenFields = self.getPage("https://w...content-available-to-author-only...m.br/judge/pt/login").find("input", {"name": "_Token[fields]"}).attrs['value']
        payload_login = {
            '_method': 'POST',
            '_csrfToken': self.s.cookies['csrfToken'],
            'email': username,
            'password': password,
            'remember_me': '0',
            '_Token[fields]': self.tokenFields,
            '_Token[unlocked]': ''
        }

        r = self.s.post("https://w...content-available-to-author-only...m.br/judge/pt/login", data=payload_login)

        if len(r.history) == 0:
            print("Invalid login. Try Again")
            return False

        self.lang = r.url.split('/')[-1]

        self.email = username
        self.author = self.getPage('https://w...content-available-to-author-only...m.br/judge/'+self.lang+'/account').find("input", {"name": "username"}).attrs['value']

        if self.lang == 'pt':
            self.folders = ["INICIANTE", "AD-HOC", "STRINGS", "ESTRUTURAS E BIBLIOTECAS", "MATEMÁTICA", "PARADIGMAS", "GRAFOS", "GEOMETRIA COMPUTACIONAL", "SQL"]
            self.header = [" Autor: "+self.author+ "<"+self.email+">", " Nome: ", " Nível: ", " Categoria: "]
            return ["Login inválido", "\nLogado com sucesso\n", "Estrutura de pastas criada", "Criando lista de download", "Lista de download criada: ", " problemas", "\nBaixando códigos", "\nCódigos baixados com sucesso", "Nome completo: ", "Email de contato: "]
        elif self.lang == 'en':
            self.folders = ["BEGINNER", "AD-HOC", "STRINGS", "DATA STRUCTURES AND LIBRARIES", "MATHEMATICS", "PARADIGMS", "GRAPH", "COMPUTATIONAL GEOMETRY", "SQL"]
            self.header = [" Author: "+self.author+ "<"+self.email+">", " Name: ", " Level: ", " Category: "]
            return ["Invalid login", "\nSuccessfully logged in\n", "Folder structure created", "Creating download list", "Download list created: ", " problems", "\nDownloading Codes", "\nCodes successfully downloaded", "Full name: ", "Email contact: "]
        elif self.lang == 'es':
            self.folders = ["INICIANTE", "AD-HOC", "STRINGS", "ESTRUTURAS E BIBLIOTECAS", "MATEMÁTICA", "PARADIGMAS", "GRAFOS", "GEOMETRIA COMPUTACIONAL", "SQL"]
            self.header = [" Autor: "+self.author+ "<"+self.email+">", " Nome: ", " Nível: ", " Categoria: "]
            return ["Login inválido", "\nLogado com sucesso\n", "Estrutura de pastas criada", "Criando lista de download", "Lista de download criada: ", " problemas", "\nBaixando códigos", "\nCódigos baixados com sucesso", "Nome completo: ", "Email de contato: "]


    def getPage(self, url):
        return BeautifulSoup(self.s.get(url).content, 'html.parser')

    def getUriProblems(self):
        qt = int(self.getPage('https://w...content-available-to-author-only...m.br/judge/'+self.lang+'/runs?answer_id=1').find("div", {"id": "table-info"}).text.split(" ")[6])
        #print(qt)
        problems = {}
        for i in range(qt, 0, -1):
            self.extractProblems(self.getPage('https://w...content-available-to-author-only...m.br/judge/'+self.lang+'/runs?answer_id=1&page='+str(i)), problems)
            #print(len(problems))
        return problems

    def getLocalProblems(self):
        problems = []
        for path in os.listdir('./URI-'+self.lang.upper()+'/'):
            for arq in os.listdir('./URI-'+self.lang.upper()+'/'+path+'/'):
                problems.append(arq.rsplit('.', 1)[0].split('-')[0].strip())
        #print(problems)
        return problems

    def infoProblem(self, id):
        menu = self.getPage('https://w...content-available-to-author-only...m.br/judge/'+self.lang+'/problems/view/'+id).find("div", {"id": "problem-menu"})
        #print("KKKK----", menu)
        return {
            "level": "1",
            "category": "Beginner"
        }

    def getCode(self, id):
        print((self.getPage('https://w...content-available-to-author-only...m.br/judge/'+self.lang+'/runs/code/'+id).find("pre", {"id": "code"})))

    def cleanName(self, name):
        return name.replace("\\", "").replace("/", "").replace(":", "").replace("?", "").replace("\"", "").replace("<", "").replace(">", "").replace("|", "")

    def downloadCodes(self, problems):
        for id in problems:
            if self.lang == "pt":
                print("\tProblema "+id+" baixado")
            else:
                print("\tProblem "+id+" downloaded")
            info = self.infoProblem(id)
            code = self.getCode(problems[id]["id_code"])
            extension = ""
            comment = "//"
            if problems[id]["language"] == "C++":
                extension = ".cpp"
            elif problems[id]["language"] == "C":
                extension = ".c"
            elif problems[id]["language"] == "Java":
                extension = ".java"
            else:
                extension = ".py"
                comment = "#"

            arq = codecs.open("./URI-"+self.lang.upper()+"/"+info["category"]+"/"+id+" - "+self.cleanName(problems[id]["name"])+extension, "w", "utf-8")
            arq.write(comment+self.header[0]+"\n")
            arq.write(comment+self.header[1]+problems[id]["name"]+"\n")
            arq.write(comment+self.header[2]+info["level"]+"\n")
            arq.write(comment+self.header[3]+info["category"]+"\n")
            arq.write(comment+" URL: https://w...content-available-to-author-only...m.br/judge/"+self.lang+"/problems/view/"+id+"\n\n")
            arq.write(code)
            arq.write("\n")
            arq.close()
            
extractor = Extractor()

message = extractor.login()
while message == False:
    message = extractor.login()

print(message[1])

extractor.folderStructure()
print(message[2])

print(message[3])
download_list = filterProblems(extractor.getUriProblems(), extractor.getLocalProblems())
print(message[4]+str(len(download_list))+message[5])

print(message[6])
extractor.downloadCodes(download_list)
print(message[7])