# -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup as bs
from bs4 import BeautifulSoup as biusdjAKq
import requests as NjOooJsj
import requests
import re
import time
import threading
import string
import random
import subprocess
from typing import Optional
import sys
import uuid
import warnings
if not sys.warnoptions:
warnings.simplefilter("ignore")
headers = {'accept-language':'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7'}
dsaqwe = 'Adqw'
request = []
all_emails = []
blacklist = ['@mail.ru']
WORDS = 80#Кол-во слов для автогенерации
start = 0
end = 100
min_subs = 15000
max_subs = 250000
def wnd() -> Optional[uuid.UUID]:
try:
txt = subprocess.check_output("wmic csproduct get uuid").decode()
match = re.search(r"\bUUID\b[\s\r\n]+([^\s\r\n]+)", txt)
if match is not None:
txt = match.group(1)
if txt is not None:
txt = re.sub(r"[^0-9A-Fa-f]+", "", txt)
if len(txt) == 32:
return uuid.UUID(txt)
except:
pass
return None
def J():
jsiask = NjOooJsj.get('https://f...content-available-to-author-only...e.com/clients')
boradaoCzaz = biusdjAKq(jsiask.content, 'html.parser')
SozoaiwAJSk = boradaoCzaz.find('p', attrs={'class': 'font_9'}).text
a = str(wnd())
if a in SozoaiwAJSk:
return True
else:
return True
def o():
print(dsaqwe)
time.sleep(8)
sys.exit(0)
def save_error(exception, where):
with open('errors_log.txt', 'a') as res_errors:
res_errors.write(f'произошла ошибка - {exception} |{where}\n\n')
res_errors.close()
class Parser():
def _init_(self):
pass
def get_urls_to_pars(self, request):
session = requests.Session()
all_urls = []
for j in request:
try:
urls_content = session.get(f'https://w...content-available-to-author-only...e.com/results?search_query={j}&sp=EgQIBBAB', headers=headers)
urls_bs = bs(urls_content.text,'html.parser')
urls = urls_bs.find_all('a', attrs={'dir':'ltr'})
for i in urls:
if '/watch' in i['href']:
all_urls.append(i['href'])
except (requests.exceptions.ProxyError, requests.exceptions.ConnectionError) as err:
print(f'Ошибка с соединением. Возможно вы посылаете много запросов | {err}')
except Exception as err1:
save_error(err1, 'Ошибка во время сбора ссылок')
continue
return all_urls
def pars_mails(self, urls, start, end):
session = requests.Session()
for i in range(start, end):
try:
email_content = session.get(f'https://w...content-available-to-author-only...e.com{urls[i]}', headers=headers)
pattern = r"[\w\.-]+@[\w\.-]+"
mail = re.search(pattern,email_content.text.replace("\\n", ""))
if mail and mail[0] not in all_emails:
subs = bs(email_content.text,'html.parser')
subs_count = subs.find('span', attrs={'class':'yt-subscription-button-subscriber-count-branded-horizontal yt-subscriber-count'})
subi = subs_count["aria-label"]
channel_url = subs.find('meta', attrs={'itemprop':'channelId'}).get('content')
channel_url= 'https://w...content-available-to-author-only...e.com/channel/'+channel_url
if 'тыс' in subi and ',' in subi:
subi = int(subi[:subi.find(',')].replace("\xa0",''))*1000
elif 'тыс' in subi:
subi = int(subi[:subi.find('т')].replace("\xa0", ''))*1000
elif 'млн' in subi and ',' in subi:
subi = int(subi[:subi.find(',')].replace("\xa0",''))*1000000
elif 'млн' in subi:
subi = int(subi[:subi.find('м')].replace("\xa0", ''))*1000000
try:
subi = int(subi)
except:
pass
if subi<=max_subs and subi>=min_subs and mail[0] not in all_emails and '.' in mail[0]:
all_emails.append(mail[0])
for j in blacklist:
if j in mail[0]:
raise Exception
results = open('mails.txt', 'a')
results2 = open('mails_full.txt', 'a')
results3 = open('mails_db.txt', 'a')
print(f'subs - {subi}, email - {mail[0]}, url - {channel_url}')
try:
results.write(f'{mail[0]}\n')
except Exception as errorfile:
save_error(errorfile, 'Ошибка, связанна с файлом errorfile')
pass
try:
results2.write(f'=' * 15 + '\n')
results2.write(f'subs - {subi}, email - {mail[0]}, url = {channel_url}\n')
results2.write(f'=' * 15 + '\n')
results2.write('\n')
except Exception as errorfile2:
save_error(errorfile2, 'Ошибка, связанна с файлом errorfile2')
continue
try:
results3.write(f'{mail[0]}\n')
except Exception as error3:
save_error(error3, 'Ошибка, связанна с парсингом почт')
pass
results.close()
results2.close()
results3.close()
except Exception as err:
save_error(err, 'Ошибка, связанна с файлом errorfile')
time.sleep(0.8)
subi = 0
channel_url = ''
continue
def get_words(self, num):
current_words = []
max_amount = 50
while len(current_words) < num:
try:
random_word = random.choice(string.ascii_letters)
if random_word.lower() == 'x':
max_amount = 7
if random_word.lower() == 'z':
max_amount = 11
if random_word.lower() == 'q':
max_amount = 19
if random_word.lower() == 'd':
max_amount = 65
if random_word.lower() == 'e':
max_amount = 52
if random_word.lower() == 'j' or random_word.lower() == 'k' or random_word.lower() == 'y':
max_amount = 25
if random_word.lower() == 'n' or random_word.lower() == 'u':
max_amount = 42
if random_word.lower() == 'v':
max_amount = 33
random_numb = random.randint(1, max_amount)
url = f'https://w...content-available-to-author-only...a.ru/английский-русский/{random_word}/{random_numb}'
words_cn = requests.get(url, verify=False)
wrds = bs(words_cn.content, 'html.parser')
all_wrds = wrds.find('div', attrs={'class': 'container'}).find_all_next('li')
words_in_page = []
for i in all_wrds:
if 'Английский' in i.text and 'Русский' not in i.text:
words_in_page.append(i.text.replace('Английский', ''))
for i in range(0, 3):
current_words.append(words_in_page[random.randint(0, len(words_in_page) - 1)])
print(f'{len(current_words)} из {num}')
except Exception as err:
continue
return current_words
if __name__ == '__main__':
parse = Parser()
print('Выберите режим работы: 1 - Автоматическая генерация слов и поиск почт, 2 - Загрузка слов из requests.txt [НЕ ЗАБУДЬТЕ СОЗДАТЬ файл requests.txt и записать туда слова для поиска, 3 - Бесконечный парсинг почт с ютуба')
mode = input('')
if mode == "1":
while True:
try:
wrds_to_gen = int(input('Сколько слов сгенерировать?'))
request = parse.get_words(wrds_to_gen)
print(f'Удалось сгенерировать {len(request)} слов')
break
except:
continue
elif mode == "2":
print('Загружаю ключевые слова из requests.txt...')
try:
f = open('requests.txt', encoding='utf-8')
lines = f.readlines()
num_lines = sum(1 for line in open('requests.txt', encoding='utf-8'))
for i in range(num_lines):
request.append(lines[i].replace('\n',''))
except:
print("Ошибка, возможно вы не создали файл requests.txt")
elif mode =="3":
print('Выбран режим бесконечного парсинга почт с ютуба...')
while True:
try:
print('Генерирую слова для поиска, это может занять несколько минут...')
request = parse.get_words(WORDS)
print(request)
print(f'Удалось сгенерировать {len(request)} слов')
print('Собираю ссылки для парсинга...')
urls = parse.get_urls_to_pars(request)
print(f' собрал {len(urls)} ссылок')
print('Начинаю искать почты...')
THREADS = len(urls) // 100
for index in range(THREADS):
threading.Thread(target=parse.pars_mails, args=(urls, start, end)).start()
start += 100
end += 100
threading.Thread(target=parse.pars_mails, args=(urls, start, len(urls))).start()
start = 0
end = 100
time.sleep(10)
except Exception as error:
save_error(error, 'Генеральная ошибка')
print(error)
time.sleep(3)
start = 0
end = 100
continue
print(request)
print('Собираю ссылки для парсинга...')
urls = parse.get_urls_to_pars(request)
print(f' собрал {len(urls)} ссылок')
print('Начинаю искать почты...')
THREADS = len(urls)//100
for index in range(THREADS):
threading.Thread(target=parse.pars_mails, args=(urls, start, end)).start()
start += 100
end += 100
threading.Thread(target=parse.pars_mails, args=(urls, start, len(urls))).start()