fork download
  1. import queue
  2. import concurrent.futures
  3. import urllib.request
  4.  
  5. URLS = ['https://w...content-available-to-author-only...s.com/',
  6. 'https://w...content-available-to-author-only...n.com/',
  7. 'https://e...content-available-to-author-only...j.com/',
  8. 'https://w...content-available-to-author-only...o.uk/',
  9. 'https://s...content-available-to-author-only...n.com/',
  10. 'https://google.com',
  11. 'https://p...content-available-to-author-only...n.org',
  12. ]
  13.  
  14. def load_url(url, timeout, q_success, q_fail):
  15. """ Загрузить страницу по url, результат положить в очереди """
  16. try:
  17. with urllib.request.urlopen(url, timeout=timeout) as conn:
  18. data = conn.read()
  19. q_success.put_nowait((url, data))
  20. return data
  21. except Exception as exc:
  22. q_fail.put_nowait((url, exc))
  23.  
  24. q_success = queue.Queue()
  25. q_fail = queue.Queue()
  26.  
  27. # Этот контексный менеджер позволяет создать пул тредов, которые будут выполнять задания
  28. # Треды будут созданы и завершены корректно, вся работа на менеждере
  29. # Пул штука очень полезная, если у нас будет 100 url, очень плохая идея запускать 100 тредов
  30. # и выполнять их одновременно.
  31. with concurrent.futures.ThreadPoolExecutor(max_workers=13) as executor:
  32. for url in URLS:
  33. executor.submit(load_url, url, 5, q_success, q_fail)
  34.  
  35. print(f"\n Sucessfully loaded: {q_success.qsize()} url(s)")
  36. while q_success.qsize():
  37. url, data = q_success.get()
  38. print(f"url [{url}], content size: {len(data)}")
  39.  
  40. print(f"\n Failed urls: {q_fail.qsize()} url(s)")
  41. while q_fail.qsize():
  42. url, exc = q_fail.get()
  43. print(f"url [{url}], exception: {exc}")
  44.  
  45.  
Success #stdin #stdout 0.06s 18372KB
stdin
Standard input is empty
stdout
 Sucessfully loaded: 0 url(s)

 Failed urls: 7 url(s)
url [https://p...content-available-to-author-only...n.org], exception: <urlopen error [Errno -3] Temporary failure in name resolution>
url [https://w...content-available-to-author-only...s.com/], exception: <urlopen error [Errno -3] Temporary failure in name resolution>
url [https://w...content-available-to-author-only...o.uk/], exception: <urlopen error [Errno -3] Temporary failure in name resolution>
url [https://s...content-available-to-author-only...n.com/], exception: <urlopen error [Errno -3] Temporary failure in name resolution>
url [https://w...content-available-to-author-only...n.com/], exception: <urlopen error [Errno -3] Temporary failure in name resolution>
url [https://e...content-available-to-author-only...j.com/], exception: <urlopen error [Errno -3] Temporary failure in name resolution>
url [https://google.com], exception: <urlopen error [Errno -3] Temporary failure in name resolution>