fork download
  1. import os
  2. from bs4 import BeautifulSoup
  3. import asyncio
  4. import aiohttp
  5. import aiofiles
  6. import sys
  7. import nest_asyncio
  8. nest_asyncio.apply()
  9.  
  10. path_to_data = os.path.join(os.getenv('SOME_PATH'), 'folder1')
  11. if not os.path.exists(path_to_data):
  12. os.makedirs(path_to_data)
  13.  
  14. async def fetch_data(session: aiohttp.ClientSession, url: str):
  15. async with session.get(url) as response:
  16. return await response.text()
  17.  
  18. async def make_soup(page):
  19. return BeautifulSoup(page, "html.parser").select('a[href] img')
  20.  
  21. async def save_images(session: aiohttp.ClientSession, img_name, img_link) -> None:
  22. async with session.get(img_link) as response:
  23. img_content = await response.read()
  24. async with aiofiles.open(os.path.join(path_to_data, img_name + ".png"), mode='wb') as imagefile:
  25. await imagefile.write(img_content)
  26.  
  27. async def take_image(session: aiohttp.ClientSession, thread_address: str, thread_number: int) -> None:
  28. thread_data = await fetch_data(session, thread_address)
  29. image_data = await make_soup(thread_data)
  30. image_number = 1
  31. for img in image_data:
  32. temp = img.find_parent('a', href=True)['href']
  33. try:
  34. if temp[:1] == "/":
  35. image = "https://s...content-available-to-author-only...e.com" + temp
  36. else:
  37. image = temp
  38. filename = str(thread_number + 1) + '-' + str(image_number)
  39. await save_images(session, filename, image)
  40. image_number += 1
  41. except:
  42. continue
  43.  
  44. async def main():
  45. with open('C:\Test.txt', 'r') as file:
  46. head = [next(file) for x in range(3)]
  47. async with aiohttp.ClientSession() as session:
  48. futures = [take_image(session, line, number) for number, line in enumerate(head)]
  49. #loop = asyncio.get_event_loop()
  50. loop = asyncio.new_event_loop()
  51. loop.run_until_complete(asyncio.wait(futures))
  52. ##loop.run_until_complete(futures)
  53. #return await asyncio.gather(*futures)
  54.  
  55. if __name__ == "__main__":
  56. if sys.version_info[0] == 3 and sys.version_info[1] >= 8 and sys.platform.startswith('win'):
  57. asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
  58. asyncio.run(main())
Runtime error #stdin #stdout #stderr 0.17s 23276KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "./prog.py", line 2, in <module>
ModuleNotFoundError: No module named 'bs4'