fork download
  1. from urllib.request import urlopen, urljoin
  2. from bs4 import BeautifulSoup
  3.  
  4.  
  5. def get_urls(url):
  6. topic_urls = []
  7. html = urlopen(url).read()
  8. soup = BeautifulSoup(html)
  9. # news feed
  10. links = soup.find_all('a', class_ = 'newsFeed_item_link' )
  11. for link in links:
  12. topic_urls.append(urljoin(url, link['href']))
  13. # next page
  14. next_page = soup.find_all('li', class_ = 'pagination_item pagination_item-next ' )
  15. if not next_page:
  16. next_url = None
  17. else:
  18. next_url = urljoin(url, next_page[0].a['href'])
  19. return {'topic': topic_urls, 'next': next_url}
  20.  
  21. def main():
  22. next_url = 'https://n...content-available-to-author-only...o.jp/topics/top-picks'
  23. topic_urls = []
  24. while next_url is not None:
  25. print(next_url)
  26. urls = get_urls(next_url)
  27. topic_urls.extend(urls['topic'])
  28. next_url = urls['next']
  29.  
  30. print(topic_urls)
  31.  
Runtime error #stdin #stdout #stderr 0.12s 23660KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "./prog.py", line 2, in <module>
ModuleNotFoundError: No module named 'bs4'