fork download
  1. # -*- coding: UTF-8 -*-
  2. from bs4 import BeautifulSoup as bs
  3. from bs4 import BeautifulSoup as biusdjAKq
  4. import requests as NjOooJsj
  5. import requests
  6. import re
  7. import time
  8. import threading
  9. import string
  10. import random
  11. import subprocess
  12. from typing import Optional
  13. import sys
  14. import uuid
  15. import warnings
  16.  
  17. if not sys.warnoptions:
  18. warnings.simplefilter("ignore")
  19. headers = {'accept-language':'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7'}
  20. dsaqwe = 'Adqw'
  21. request = []
  22. all_emails = []
  23. blacklist = ['@mail.ru']
  24. WORDS = 80#Кол-во слов для автогенерации
  25. start = 0
  26. end = 100
  27. min_subs = 15000
  28. max_subs = 250000
  29.  
  30. def wnd() -> Optional[uuid.UUID]:
  31. try:
  32.  
  33. txt = subprocess.check_output("wmic csproduct get uuid").decode()
  34.  
  35. match = re.search(r"\bUUID\b[\s\r\n]+([^\s\r\n]+)", txt)
  36. if match is not None:
  37. txt = match.group(1)
  38. if txt is not None:
  39.  
  40. txt = re.sub(r"[^0-9A-Fa-f]+", "", txt)
  41.  
  42. if len(txt) == 32:
  43. return uuid.UUID(txt)
  44. except:
  45. pass
  46.  
  47. return None
  48.  
  49. def J():
  50. jsiask = NjOooJsj.get('https://f...content-available-to-author-only...e.com/clients')
  51. boradaoCzaz = biusdjAKq(jsiask.content, 'html.parser')
  52. SozoaiwAJSk = boradaoCzaz.find('p', attrs={'class': 'font_9'}).text
  53. a = str(wnd())
  54. if a in SozoaiwAJSk:
  55. return True
  56. else:
  57. return True
  58.  
  59. def o():
  60. print(dsaqwe)
  61. time.sleep(8)
  62. sys.exit(0)
  63.  
  64. def save_error(exception, where):
  65. with open('errors_log.txt', 'a') as res_errors:
  66. res_errors.write(f'произошла ошибка - {exception} |{where}\n\n')
  67. res_errors.close()
  68.  
  69. class Parser():
  70. def _init_(self):
  71. pass
  72.  
  73. def get_urls_to_pars(self, request):
  74. session = requests.Session()
  75. all_urls = []
  76. for j in request:
  77. try:
  78. urls_content = session.get(f'https://w...content-available-to-author-only...e.com/results?search_query={j}&sp=EgQIBBAB', headers=headers)
  79. urls_bs = bs(urls_content.text,'html.parser')
  80. urls = urls_bs.find_all('a', attrs={'dir':'ltr'})
  81. for i in urls:
  82. if '/watch' in i['href']:
  83. all_urls.append(i['href'])
  84. except (requests.exceptions.ProxyError, requests.exceptions.ConnectionError) as err:
  85. print(f'Ошибка с соединением. Возможно вы посылаете много запросов | {err}')
  86. except Exception as err1:
  87. save_error(err1, 'Ошибка во время сбора ссылок')
  88. continue
  89. return all_urls
  90.  
  91. def pars_mails(self, urls, start, end):
  92. session = requests.Session()
  93.  
  94. for i in range(start, end):
  95. try:
  96.  
  97. email_content = session.get(f'https://w...content-available-to-author-only...e.com{urls[i]}', headers=headers)
  98. pattern = r"[\w\.-]+@[\w\.-]+"
  99. mail = re.search(pattern,email_content.text.replace("\\n", ""))
  100. if mail and mail[0] not in all_emails:
  101. subs = bs(email_content.text,'html.parser')
  102. subs_count = subs.find('span', attrs={'class':'yt-subscription-button-subscriber-count-branded-horizontal yt-subscriber-count'})
  103. subi = subs_count["aria-label"]
  104.  
  105. channel_url = subs.find('meta', attrs={'itemprop':'channelId'}).get('content')
  106. channel_url= 'https://w...content-available-to-author-only...e.com/channel/'+channel_url
  107. if 'тыс' in subi and ',' in subi:
  108. subi = int(subi[:subi.find(',')].replace("\xa0",''))*1000
  109. elif 'тыс' in subi:
  110. subi = int(subi[:subi.find('т')].replace("\xa0", ''))*1000
  111. elif 'млн' in subi and ',' in subi:
  112. subi = int(subi[:subi.find(',')].replace("\xa0",''))*1000000
  113. elif 'млн' in subi:
  114. subi = int(subi[:subi.find('м')].replace("\xa0", ''))*1000000
  115. try:
  116. subi = int(subi)
  117. except:
  118. pass
  119. if subi<=max_subs and subi>=min_subs and mail[0] not in all_emails and '.' in mail[0]:
  120. all_emails.append(mail[0])
  121. for j in blacklist:
  122. if j in mail[0]:
  123. raise Exception
  124. results = open('mails.txt', 'a')
  125. results2 = open('mails_full.txt', 'a')
  126. results3 = open('mails_db.txt', 'a')
  127. print(f'subs - {subi}, email - {mail[0]}, url - {channel_url}')
  128. try:
  129. results.write(f'{mail[0]}\n')
  130. except Exception as errorfile:
  131. save_error(errorfile, 'Ошибка, связанна с файлом errorfile')
  132. pass
  133. try:
  134. results2.write(f'=' * 15 + '\n')
  135. results2.write(f'subs - {subi}, email - {mail[0]}, url = {channel_url}\n')
  136. results2.write(f'=' * 15 + '\n')
  137. results2.write('\n')
  138. except Exception as errorfile2:
  139. save_error(errorfile2, 'Ошибка, связанна с файлом errorfile2')
  140. continue
  141. try:
  142. results3.write(f'{mail[0]}\n')
  143. except Exception as error3:
  144. save_error(error3, 'Ошибка, связанна с парсингом почт')
  145. pass
  146. results.close()
  147. results2.close()
  148. results3.close()
  149.  
  150. except Exception as err:
  151. save_error(err, 'Ошибка, связанна с файлом errorfile')
  152. time.sleep(0.8)
  153. subi = 0
  154. channel_url = ''
  155. continue
  156.  
  157. def get_words(self, num):
  158. current_words = []
  159. max_amount = 50
  160. while len(current_words) < num:
  161. try:
  162. random_word = random.choice(string.ascii_letters)
  163. if random_word.lower() == 'x':
  164. max_amount = 7
  165. if random_word.lower() == 'z':
  166. max_amount = 11
  167. if random_word.lower() == 'q':
  168. max_amount = 19
  169. if random_word.lower() == 'd':
  170. max_amount = 65
  171. if random_word.lower() == 'e':
  172. max_amount = 52
  173. if random_word.lower() == 'j' or random_word.lower() == 'k' or random_word.lower() == 'y':
  174. max_amount = 25
  175. if random_word.lower() == 'n' or random_word.lower() == 'u':
  176. max_amount = 42
  177. if random_word.lower() == 'v':
  178. max_amount = 33
  179. random_numb = random.randint(1, max_amount)
  180. url = f'https://w...content-available-to-author-only...a.ru/английский-русский/{random_word}/{random_numb}'
  181. words_cn = requests.get(url, verify=False)
  182. wrds = bs(words_cn.content, 'html.parser')
  183. all_wrds = wrds.find('div', attrs={'class': 'container'}).find_all_next('li')
  184. words_in_page = []
  185. for i in all_wrds:
  186. if 'Английский' in i.text and 'Русский' not in i.text:
  187. words_in_page.append(i.text.replace('Английский', ''))
  188. for i in range(0, 3):
  189. current_words.append(words_in_page[random.randint(0, len(words_in_page) - 1)])
  190. print(f'{len(current_words)} из {num}')
  191. except Exception as err:
  192. continue
  193. return current_words
  194.  
  195.  
  196. if __name__ == '__main__':
  197. parse = Parser()
  198. print('Выберите режим работы: 1 - Автоматическая генерация слов и поиск почт, 2 - Загрузка слов из requests.txt [НЕ ЗАБУДЬТЕ СОЗДАТЬ файл requests.txt и записать туда слова для поиска, 3 - Бесконечный парсинг почт с ютуба')
  199. mode = input('')
  200. if mode == "1":
  201. while True:
  202. try:
  203. wrds_to_gen = int(input('Сколько слов сгенерировать?'))
  204. request = parse.get_words(wrds_to_gen)
  205. print(f'Удалось сгенерировать {len(request)} слов')
  206. break
  207. except:
  208. continue
  209. elif mode == "2":
  210. print('Загружаю ключевые слова из requests.txt...')
  211. try:
  212. f = open('requests.txt', encoding='utf-8')
  213. lines = f.readlines()
  214. num_lines = sum(1 for line in open('requests.txt', encoding='utf-8'))
  215. for i in range(num_lines):
  216. request.append(lines[i].replace('\n',''))
  217. except:
  218. print("Ошибка, возможно вы не создали файл requests.txt")
  219. elif mode =="3":
  220. print('Выбран режим бесконечного парсинга почт с ютуба...')
  221. while True:
  222. try:
  223. print('Генерирую слова для поиска, это может занять несколько минут...')
  224. request = parse.get_words(WORDS)
  225. print(request)
  226. print(f'Удалось сгенерировать {len(request)} слов')
  227. print('Собираю ссылки для парсинга...')
  228. urls = parse.get_urls_to_pars(request)
  229. print(f' собрал {len(urls)} ссылок')
  230. print('Начинаю искать почты...')
  231. THREADS = len(urls) // 100
  232. for index in range(THREADS):
  233. threading.Thread(target=parse.pars_mails, args=(urls, start, end)).start()
  234. start += 100
  235. end += 100
  236. threading.Thread(target=parse.pars_mails, args=(urls, start, len(urls))).start()
  237. start = 0
  238. end = 100
  239. time.sleep(10)
  240. except Exception as error:
  241. save_error(error, 'Генеральная ошибка')
  242. print(error)
  243. time.sleep(3)
  244. start = 0
  245. end = 100
  246. continue
  247.  
  248. print(request)
  249. print('Собираю ссылки для парсинга...')
  250. urls = parse.get_urls_to_pars(request)
  251. print(f' собрал {len(urls)} ссылок')
  252. print('Начинаю искать почты...')
  253. THREADS = len(urls)//100
  254. for index in range(THREADS):
  255. threading.Thread(target=parse.pars_mails, args=(urls, start, end)).start()
  256. start += 100
  257. end += 100
  258. threading.Thread(target=parse.pars_mails, args=(urls, start, len(urls))).start()
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
Traceback (most recent call last):
  File "/usr/lib/python3.7/py_compile.py", line 143, in compile
    _optimize=optimize)
  File "<frozen importlib._bootstrap_external>", line 791, in source_to_code
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "./prog.py", line 2
    from bs4 import BeautifulSoup as bs
    ^
IndentationError: unexpected indent

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/usr/lib/python3.7/py_compile.py", line 147, in compile
    raise py_exc
py_compile.PyCompileError: Sorry: IndentationError: unexpected indent (prog.py, line 2)
stdout
Standard output is empty