import os
from bs4 import BeautifulSoup
import asyncio
import aiohttp
import aiofiles
import sys
import nest_asyncio
nest_asyncio.apply()
path_to_data = os.path.join(os.getenv('SOME_PATH'), 'folder1')
if not os.path.exists(path_to_data):
os.makedirs(path_to_data)
async def fetch_data(session: aiohttp.ClientSession, url: str):
async with session.get(url) as response:
return await response.text()
async def make_soup(page):
return BeautifulSoup(page, "html.parser").select('a[href] img')
async def save_images(session: aiohttp.ClientSession, img_name, img_link) -> None:
async with session.get(img_link) as response:
img_content = await response.read()
async with aiofiles.open(os.path.join(path_to_data, img_name + ".png"), mode='wb') as imagefile:
await imagefile.write(img_content)
async def take_image(session: aiohttp.ClientSession, thread_address: str, thread_number: int) -> None:
thread_data = await fetch_data(session, thread_address)
image_data = await make_soup(thread_data)
image_number = 1
for img in image_data:
temp = img.find_parent('a', href=True)['href']
try:
if temp[:1] == "/":
image = "https://s...content-available-to-author-only...e.com" + temp
else:
image = temp
filename = str(thread_number + 1) + '-' + str(image_number)
await save_images(session, filename, image)
image_number += 1
except:
continue
async def main():
with open('C:\Test.txt', 'r') as file:
head = [next(file) for x in range(3)]
async with aiohttp.ClientSession() as session:
futures = [take_image(session, line, number) for number, line in enumerate(head)]
#loop = asyncio.get_event_loop()
loop = asyncio.new_event_loop()
loop.run_until_complete(asyncio.wait(futures))
##loop.run_until_complete(futures)
#return await asyncio.gather(*futures)
if __name__ == "__main__":
if sys.version_info[0] == 3 and sys.version_info[1] >= 8 and sys.platform.startswith('win'):
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(main())